Skip to content

Commit

Permalink
feat: wikimedia foundation methods
Browse files Browse the repository at this point in the history
  • Loading branch information
Tomeriko96 committed Jan 20, 2024
1 parent d0c0a88 commit 1cedf8f
Show file tree
Hide file tree
Showing 9 changed files with 242 additions and 0 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,8 @@ export(linguee_translation_examples)
export(linguee_word_translation)
export(mymemory_translate)
export(translate_file)
export(wikimedia_detect_language)
export(wikipedia_get_language_names)
export(wmcloud_translate)
importFrom(magrittr,"%>%")
importFrom(rlang,":=")
45 changes: 45 additions & 0 deletions R/wikimedia_detect_language.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#' Detect the language of a text
#'
#' This function sends a POST request to the Wikimedia Language ID API with the specified text,
#' parses the JSON response, and returns the detected language.
#'
#' @param text The text whose language is to be detected.
#'
#' @return The detected language.
#'
#' @examples
#' # Detect the language of a text
#' wikimedia_detect_language("Hallo, wereld")
#'
#' @export
wikimedia_detect_language <- function(text) {
# Define the URL of the API
url <- "https://api.wikimedia.org/service/lw/inference/v1/models/langid:predict"

# Create a list of parameters to send in the POST request
body <- list(
text = text
)

# Convert the list to a JSON string
json_body <- jsonlite::toJSON(body, auto_unbox = TRUE)

# Set the content type of the request to 'application/json'
headers <- c("Content-Type" = "application/json")

# Send the POST request and get the response
response <- httr::POST(url, body = json_body, httr::add_headers(headers))

# Check if the request was successful
if (httr::status_code(response) != 200) {
stop("Request failed with status ", httr::status_code(response))
}

# Parse the response
result <- httr::content(response, "parsed")

# Extract the detected language from the response
language <- result$wikicode

return(language)
}
42 changes: 42 additions & 0 deletions R/wikipedia_get_language_names.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#' Get language names
#'
#' This function sends a GET request to the Wikipedia API and returns the language names as a dataframe.
#'
#' @return A dataframe of language names.
#'
#' @examples
#' # Get language names
#' wikipedia_get_language_names()
#'
#' @export
wikipedia_get_language_names <- function() {
# Define the URL of the API
url <- "https://en.wikipedia.org/w/api.php?action=query&liprop=autonym|name&meta=languageinfo&uselang=en&format=json&origin=*"

# Send the GET request and get the response
response <- httr::GET(url)

# Check if the request was successful
if (httr::status_code(response) != 200) {
stop("Request failed with status ", httr::status_code(response))
}

# Parse the response
result <- httr::content(response, "parsed")

# Extract the language names from the response
language_names <- result$query$languageinfo

# Get the keys of the language_names list
language_tags <- names(language_names)

# Combine the language_tags, names, and autonyms into a dataframe
df <- data.frame(
language_tag = language_tags,
name = sapply(language_names, function(x) x$name),
autonym = sapply(language_names, function(x) x$autonym),
row.names = NULL
)

return(df)
}
69 changes: 69 additions & 0 deletions R/wmcloud_translate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#' Translate text using WMCloud
#'
#' This function sends a POST request to the WMCloud translation API with the specified parameters,
#' parses the JSON response, and returns the translated text.
#'
#' @param text The text to translate.
#' @param target_language The target language for the translation (default is "en").
#' @param source_language The source language of the text (default is "en").
#' @param format The format of the content ("html", "json", "markdown", "text", "svg", "webpage").
#' @param model The model to use for the translation (only "nllb200-600M" is currently known to work).
#'
#' @return The translated text.
#' @export
wmcloud_translate <- function(text,
target_language = "en",
source_language = "en",
format = "text",
model = "nllb200-600M") {
# Define the URL of the API
url <- "https://translate.wmcloud.org/api/translate"

# List of valid formats
valid_formats <- c("html", "json", "markdown", "text", "svg", "webpage")

# List of valid models
valid_models <- c("nllb200-600M") # Add more models here

# Check if format and model are valid
if (!format %in% valid_formats) {
stop(paste("Invalid format. Must be one of:", paste(valid_formats, collapse = ", ")))
}
if (!model %in% valid_models) {
stop(paste("Invalid model. Must be one of:", paste(valid_models, collapse = ", ")))
}

# Create a list of parameters to send in the POST request
body <- list(
source_language = source_language,
target_language = target_language,
format = format,
model = model,
content = text
)

# Convert the list to a JSON string
json_body <- jsonlite::toJSON(body, auto_unbox = TRUE)

# Set the content type of the request to 'application/json'
headers <- c("Content-Type" = "application/json")

# Send the POST request and get the response
response <- httr::POST(url, body = json_body, httr::add_headers(headers))

# Check if the request was successful
if (httr::status_code(response) != 200) {
stop("Request failed with status ", httr::status_code(response))
}

# Parse the response
result <- httr::content(response, "parsed")

# Extract the translated text from the response
translation <- result$translation

return(translation)
}



2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ Currently, the package has functions to communicate with the following services:

- Linguee API

- Wikimedia Translation

# Features
To see which functions are available, please refer to the reference page of the `polyglotr` package. The [reference](https://Tomeriko96.github.io/polyglotr/reference/index.html) page provides a comprehensive list of functions available in the package, organized by category.

Expand Down
7 changes: 7 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,10 @@ reference:
desc: Methods using MyMemory Translation services
contents:
- mymemory_translate

- title: Wikimedia Foundation Methods
desc: Methods using Wikimedia Foundation Translation services
contents:
- wikimedia_detect_language
- wikipedia_get_language_names
- wmcloud_translate
23 changes: 23 additions & 0 deletions man/wikimedia_detect_language.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/wikipedia_get_language_names.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions man/wmcloud_translate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 1cedf8f

Please sign in to comment.