From ac8b6595f762d04216f4b53e7216ac7cc6ccb838 Mon Sep 17 00:00:00 2001 From: orichters Date: Mon, 3 Feb 2025 10:46:20 +0100 Subject: [PATCH] add unitjoin to undo unitsplit --- .buildlibrary | 4 +++- .github/workflows/check.yaml | 25 ++++++++++++++-------- .pre-commit-config.yaml | 6 +++--- CITATION.cff | 4 ++-- DESCRIPTION | 4 ++-- NAMESPACE | 1 + R/unitjoin.R | 37 +++++++++++++++++++++++++++++++++ R/unitsplit.R | 2 +- README.md | 15 ++++++------- man/unitjoin.Rd | 25 ++++++++++++++++++++++ tests/testthat/test-unitsplit.R | 8 ++++++- 11 files changed, 105 insertions(+), 26 deletions(-) create mode 100644 R/unitjoin.R create mode 100644 man/unitjoin.Rd diff --git a/.buildlibrary b/.buildlibrary index 76d438ab..1e26e640 100644 --- a/.buildlibrary +++ b/.buildlibrary @@ -1,4 +1,4 @@ -ValidationKey: '123025056' +ValidationKey: '124192984' AcceptedWarnings: - 'Warning: package ''.*'' was built under R version' - 'Warning: namespace ''.*'' is not available and has been replaced' @@ -7,3 +7,5 @@ AcceptedNotes: unable to verify current time AutocreateReadme: yes allowLinterWarnings: no enforceVersionUpdate: no +AutocreateCITATION: yes +skipCoverage: no diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml index f6ea5d40..54aa78bc 100644 --- a/.github/workflows/check.yaml +++ b/.github/workflows/check.yaml @@ -23,14 +23,14 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: extra-packages: | - any::lucode2 - any::covr - any::madrat - any::magclass - any::citation - any::gms - any::goxygen - any::GDPuc + lucode2 + covr + madrat + magclass + citation + gms + goxygen + GDPuc # piam packages also available on CRAN (madrat, magclass, citation, # gms, goxygen, GDPuc) will usually have an outdated binary version # available; by using extra-packages we get the newest version @@ -44,6 +44,13 @@ jobs: [ -f requirements.txt ] && python -m pip install --upgrade pip wheel || true [ -f requirements.txt ] && pip install -r requirements.txt || true + - name: Run pre-commit checks + shell: bash + run: | + python -m pip install pre-commit + python -m pip freeze --local + pre-commit run --show-diff-on-failure --color=always --all-files + - name: Verify validation key shell: Rscript {0} run: lucode2:::validkey(stopIfInvalid = TRUE) @@ -63,6 +70,6 @@ jobs: shell: Rscript {0} run: | nonDummyTests <- setdiff(list.files("./tests/testthat/"), c("test-dummy.R", "_snaps")) - if(length(nonDummyTests) > 0) covr::codecov(quiet = FALSE) + if(length(nonDummyTests) > 0 && !lucode2:::loadBuildLibraryConfig()[["skipCoverage"]]) covr::codecov(quiet = FALSE) env: NOT_CRAN: "true" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 62f13da6..3edf90a5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ exclude: '^tests/testthat/_snaps/.*$' repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: 2c9f875913ee60ca25ce70243dc24d5b6415598c # frozen: v4.6.0 + rev: cef0300fd0fc4d2a87a85fa2093c6b283ea36f4b # frozen: v5.0.0 hooks: - id: check-case-conflict - id: check-json @@ -15,7 +15,7 @@ repos: - id: mixed-line-ending - repo: https://github.com/lorenzwalthert/precommit - rev: 7910e0323d7213f34275a7a562b9ef0fde8ce1b9 # frozen: v0.4.2 + rev: 3b70240796cdccbe1474b0176560281aaded97e6 # frozen: v0.4.3.9003 hooks: - id: parsable-R - id: deps-in-desc @@ -25,4 +25,4 @@ repos: - id: readme-rmd-rendered - id: use-tidy-description ci: - autoupdate_schedule: quarterly + autoupdate_schedule: weekly diff --git a/CITATION.cff b/CITATION.cff index 47b8dbc7..a59661c4 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,8 +2,8 @@ cff-version: 1.2.0 message: If you use this software, please cite it using the metadata from this file. type: software title: 'magclass: Data Class and Tools for Handling Spatial-Temporal Data' -version: 6.17.1 -date-released: '2024-08-01' +version: 6.17.2 +date-released: '2025-02-03' abstract: Data class for increased interoperability working with spatial-temporal data together with corresponding functions and methods (conversions, basic calculations and basic data manipulation). The class distinguishes between spatial, temporal diff --git a/DESCRIPTION b/DESCRIPTION index a445beed..c6242053 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Type: Package Package: magclass Title: Data Class and Tools for Handling Spatial-Temporal Data -Version: 6.17.1 -Date: 2024-08-01 +Version: 6.17.2 +Date: 2025-02-03 Authors@R: c( person("Jan Philipp", "Dietrich", , "dietrich@pik-potsdam.de", comment = c(affiliation = "Potsdam Institute for Climate Impact Research", ORCID = "0000-0002-4309-6431"), role = c("aut", "cre")), diff --git a/NAMESPACE b/NAMESPACE index dcbff6a7..37a90bf9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -79,6 +79,7 @@ export(setYears) export(spatRasterToDataset) export(suppressSpecificWarnings) export(time_interpolate) +export(unitjoin) export(unitsplit) export(unwrap) export(where) diff --git a/R/unitjoin.R b/R/unitjoin.R new file mode 100644 index 00000000..60dd96d3 --- /dev/null +++ b/R/unitjoin.R @@ -0,0 +1,37 @@ +#' joins a data.frame or vector of strings with variable and unit separated into +#' a data.frame with variable and unit joined as 'variable (unit)'. +#' Use magclass::unitsplit to split them again +#' +#' @param x data.frame or vector of strings +#' @param col column name. Default: variable +#' @param unit vector of strings. If NULL, col 'unit' in x is used +#' @return data.frame or vector of strings, dependent on x +#' +#' @export +unitjoin <- function(x, unit = NULL, col = "variable") { + # check whether is a data.frame + if (is.null(ncol(x))) { + if (length(x) == 0 || is.null(unit)) { + # return empty variable vector or if no unit specified + return(x) + } else { + # paste unit to it, keeping factor type + if (is.factor(x)) { + return(as.factor(paste0(x, " (", unit, ")"))) + } else { + return(paste0(x, " (", unit, ")")) + } + } + } else { # is data.frame + # no unit is specified -> take from unit column + stopifnot(col %in% colnames(x)) + if (is.null(unit)) { + x[col] <- unitjoin(x[[col]], x[["unit"]]) + x["unit"] <- NULL + } else { + # use what is specified + x[col] <- unitjoin(x[[col]], unit) + } + return(x) + } +} diff --git a/R/unitsplit.R b/R/unitsplit.R index f7411de1..10e7f867 100644 --- a/R/unitsplit.R +++ b/R/unitsplit.R @@ -37,7 +37,7 @@ unitsplit <- function(x, col = "variable") { varName <- sub(pattern, "\\1", x[[col]], perl = TRUE) unit <- sub(pattern, "\\3", x[[col]], perl = TRUE) unit[grep(pattern, x[[col]], invert = TRUE, perl = TRUE)] <- "N/A" - tmp <- data.frame(varName, unit, stringsAsFactors = FALSE) + tmp <- data.frame(varName, unit, stringsAsFactors = is.factor(x[[col]])) names(tmp) <- c(names(x[col]), "unit") x <- cbind(tmp, x[setdiff(names(x), names(x[col]))]) return(x) diff --git a/README.md b/README.md index 3f9372ba..f57fa939 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Data Class and Tools for Handling Spatial-Temporal Data -R package **magclass**, version **6.17.1** +R package **magclass**, version **6.17.2** [![CRAN status](https://www.r-pkg.org/badges/version/magclass)](https://cran.r-project.org/package=magclass) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1158580.svg)](https://doi.org/10.5281/zenodo.1158580) [![R build status](https://github.com/pik-piam/magclass/workflows/check/badge.svg)](https://github.com/pik-piam/magclass/actions) [![codecov](https://codecov.io/gh/pik-piam/magclass/branch/master/graph/badge.svg)](https://app.codecov.io/gh/pik-piam/magclass) [![r-universe](https://pik-piam.r-universe.dev/badges/magclass)](https://pik-piam.r-universe.dev/builds) @@ -56,17 +56,18 @@ In case of questions / problems please contact Jan Philipp Dietrich , R package version 6.17.1, . +Dietrich J, Bodirsky B, Bonsch M, Humpenoeder F, Bi S, Karstens K, Leip D, Sauer P, Baumstark L, Bertram C, Giannousakis A, Klein D, Neher I, Pehl M, Schultes A, Stevanovic M, Wang X, Beier F, Pflüger M, Richters O (2025). "magclass: Data Class and Tools for Handling Spatial-Temporal Data." doi:10.5281/zenodo.1158580 , Version: 6.17.2, . A BibTeX entry for LaTeX users is ```latex -@Manual{, +@Misc{, title = {magclass: Data Class and Tools for Handling Spatial-Temporal Data}, - author = {Jan Philipp Dietrich and Benjamin Leon Bodirsky and Markus Bonsch and Florian Humpenoeder and Stephen Bi and Kristine Karstens and Debbora Leip and Pascal Sauer}, - year = {2024}, - note = {R package version 6.17.1}, - url = {https://github.com/pik-piam/magclass}, + author = {Jan Philipp Dietrich and Benjamin Leon Bodirsky and Markus Bonsch and Florian Humpenoeder and Stephen Bi and Kristine Karstens and Debbora Leip and Pascal Sauer and Lavinia Baumstark and Christoph Bertram and Anastasis Giannousakis and David Klein and Ina Neher and Michaja Pehl and Anselm Schultes and Miodrag Stevanovic and Xiaoxi Wang and Felicitas Beier and Mika Pflüger and Oliver Richters}, doi = {10.5281/zenodo.1158580}, + date = {2025-02-03}, + year = {2025}, + url = {https://github.com/pik-piam/magclass}, + note = {Version: 6.17.2}, } ``` diff --git a/man/unitjoin.Rd b/man/unitjoin.Rd new file mode 100644 index 00000000..5a2f636d --- /dev/null +++ b/man/unitjoin.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/unitjoin.R +\name{unitjoin} +\alias{unitjoin} +\title{joins a data.frame or vector of strings with variable and unit separated into +a data.frame with variable and unit joined as 'variable (unit)'. +Use magclass::unitsplit to split them again} +\usage{ +unitjoin(x, unit = NULL, col = "variable") +} +\arguments{ +\item{x}{data.frame or vector of strings} + +\item{unit}{vector of strings. If NULL, col 'unit' in x is used} + +\item{col}{column name. Default: variable} +} +\value{ +data.frame or vector of strings, dependent on x +} +\description{ +joins a data.frame or vector of strings with variable and unit separated into +a data.frame with variable and unit joined as 'variable (unit)'. +Use magclass::unitsplit to split them again +} diff --git a/tests/testthat/test-unitsplit.R b/tests/testthat/test-unitsplit.R index e80613e6..9122b4bb 100644 --- a/tests/testthat/test-unitsplit.R +++ b/tests/testthat/test-unitsplit.R @@ -1,4 +1,4 @@ -test_that("unitsplit on vectors and strings works", { +test_that("unitsplit and unitjoin on vectors and strings works", { teststring <- "Emi|CO2|+|Energy (Mt CO2/yr)" expected <- data.frame( variable = "Emi|CO2|+|Energy", @@ -7,6 +7,7 @@ test_that("unitsplit on vectors and strings works", { expect_identical(unitsplit(teststring), expected) expect_identical(unitsplit(teststring, 1), expected) expect_identical(unitsplit(teststring, "variable"), expected) + expect_equal(unitjoin(expected)$variable, teststring) testvector <- c("Emi|CO2|+|Energy (Mt CO2/yr)", "Emi|CO|Land Use (Mt CO/yr)") expected <- data.frame( variable = c("Emi|CO2|+|Energy", "Emi|CO|Land Use"), @@ -15,6 +16,9 @@ test_that("unitsplit on vectors and strings works", { expect_identical(unitsplit(testvector), expected) expect_identical(unitsplit(testvector, 1), expected) expect_identical(unitsplit(testvector, "variable"), expected) + expect_equal(unitjoin(expected)$variable, testvector) + expect_identical(unitjoin(as.factor("V"), as.factor("U")), as.factor("V (U)")) + expect_identical(unitjoin("V", "U"), "V (U)") }) test_that("simple unitsplit works", { @@ -41,6 +45,8 @@ test_that("simple unitsplit works", { ) expect_identical(unitsplit(df, 4), expected) expect_identical(unitsplit(df, "Data"), expected) + expect_equal(unitjoin(expected, col = "Data")[colnames(df)], df) + expect_error(unitjoin(expected, col = "doesnotexist")) }) test_that("unitsplit works with braces", {