From d114c26c6eb4bf37fe8c5d6dc7169039de9c5d43 Mon Sep 17 00:00:00 2001 From: bczernecki Date: Sun, 20 Oct 2024 18:54:07 +0200 Subject: [PATCH 01/19] fix: hydro imgw daily --- DESCRIPTION | 2 +- NEWS.md | 5 + R/hydro_imgw_daily.R | 134 +++++++++++++++---------- tests/testthat/test-meteo_imgw_daily.R | 2 +- 4 files changed, 90 insertions(+), 53 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f1182a8..f857ca1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: climate Title: Interface to Download Meteorological (and Hydrological) Datasets -Version: 1.2.1 +Version: 1.2.2 Authors@R: c(person(given = "Bartosz", family = "Czernecki", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index d32dc4b..33a1568 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# climate 1.2.3 + +* Major fixes for adjusting code to recognize different encoding and directory structure for (IMGW) hydrological datasets + + # climate 1.2.2 * Major fixes for adjusting code to stay in line with CRAN policies diff --git a/R/hydro_imgw_daily.R b/R/hydro_imgw_daily.R index 3b0d13d..2dadd67 100644 --- a/R/hydro_imgw_daily.R +++ b/R/hydro_imgw_daily.R @@ -59,6 +59,11 @@ hydro_imgw_daily_bp = function(year, base_url = "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/" interval = "daily" interval_pl = "dobowe" + + # initiate empty objects: + all_data = NULL + codz_data = NULL + zjaw_data = NULL temp = tempfile() test_url(link = paste0(base_url, interval_pl, "/"), output = temp) @@ -68,63 +73,89 @@ hydro_imgw_daily_bp = function(year, catalogs = as.character(readHTMLTable(a)[[1]]$Name[ind]) catalogs = gsub(x = catalogs, pattern = "/", replacement = "") catalogs = catalogs[catalogs %in% as.character(year)] + if (length(catalogs) == 0) { stop("Selected year(s) is/are not available in the database.", call. = FALSE) } meta = hydro_metadata_imgw(interval) - all_data = vector("list", length = length(catalogs)) for (i in seq_along(catalogs)) { catalog = catalogs[i] - iterator = c("01", "02", "03", "04", "05", "06", - "07", "08", "09", "10", "11", "12") - data = NULL + + temp = tempfile() + test_url(link = paste0(base_url, interval_pl, "/", catalog), output = temp) + b = readLines(temp, warn = FALSE) + + files_in_dir = readHTMLTable(b)[[1]]$Name + ind = grep(files_in_dir, pattern = "zip") + codz_files = grep(x = files_in_dir, pattern = "codz", value = TRUE) + zjaw_files = grep(x = files_in_dir, pattern = "zjaw", value = TRUE) + iterator = c(codz_files, zjaw_files) + for (j in seq_along(iterator)) { - address = paste0(base_url, interval_pl, "/", catalog, "/codz_", catalog, "_", iterator[j], ".zip") - temp = tempfile() - temp2 = tempfile() - test_url(address, temp) - #download.file(address, temp) - unzip(zipfile = temp, exdir = temp2) - file1 = paste(temp2, dir(temp2), sep = "/")[1] + + # file pattern for codz: + if (grepl(x = iterator[j], "codz")) { + address = paste0(base_url, interval_pl, "/", catalog, "/", iterator[j]) + temp = tempfile() + temp2 = tempfile() + test_url(link = address, output = temp) + unzip(zipfile = temp, exdir = temp2) + file1 = paste(temp2, dir(temp2), sep = "/")[1] + + if (translit) { + data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", file1))) + } else { + data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ",", + fileEncoding = "CP1250"), + warning = function(w) { + read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") + }) + } + # extra exception for a current year according to information provided by IMGW-PIB:, i.e.: + # "Do czasu zakonczenia kontroli przeplywow rekordy z danymi z roku 2020 maja format: + # Kod stacji #Nazwa stacji #Nazwa rzeki/jeziora #Rok hydrologiczny #Wskaznik miesiaca w roku hydrologicznym + # Dzien #Stan wody [cm] #Temperatura wody [st. C] #Miesiac kalendarzowy + if (ncol(data1) == 9) { + data1$flow = NA + data1 = data1[, c(1:7, 10, 8:9)] + } - if (translit) { - data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1))) - } else { - data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250") - } - # extra exception for a current year according to information provided by IMGW-PIB: - # i.e.: - # "Do czasu zakonczenia kontroli przeplywow rekordy z danymi z roku 2020 maja format: - #Kod stacji #Nazwa stacji #Nazwa rzeki/jeziora #Rok hydrologiczny #Wskaznik miesiaca w roku hydrologicznym - #Dzien #Stan wody [cm] #Temperatura wody [st. C] #Miesiac kalendarzowy - if (ncol(data1) == 9) { - data1$flow = NA - data1 = data1[, c(1:7, 10, 8:9)] + colnames(data1) = meta[[1]][, 1] + codz_data = rbind(codz_data, data1) + } # end of codz_ + + + # start of zjaw_ section: + if (grepl(x = iterator[j], "zjaw")) { + address = paste0(base_url, interval_pl, "/", catalog, "/", iterator[j]) + temp = tempfile() + temp2 = tempfile() + test_url(address, temp) + unzip(zipfile = temp, exdir = temp2) + file2 = paste(temp2, dir(temp2), sep = "/")[1] + + if (translit) { + data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", file1))) + } else { + data2 = tryCatch(expr = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, sep = ",", + fileEncoding = "CP1250"), + warning = function(w) { + read.csv(file2, header = FALSE, stringsAsFactors = FALSE, sep = ";") + }) + } + + colnames(data2) = meta[[2]][, 1] + zjaw_data = rbind(zjaw_data, data2) } - - colnames(data1) = meta[[1]][, 1] - data = rbind(data, data1) - } - address = paste0(base_url, interval_pl, "/", catalog, "/zjaw_", catalog, ".zip") - - temp = tempfile() - temp2 = tempfile() - test_url(address, temp) - unzip(zipfile = temp, exdir = temp2) - file2 = paste(temp2, dir(temp2), sep = "/")[1] - - if (translit) { - data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file2))) - } else { - data2 = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250") - } - - colnames(data2) = meta[[2]][, 1] - all_data[[i]] = merge(data, data2, - by = intersect(colnames(data), colnames(data2)), - all.x = TRUE) - } + + } #end of loop for (usually monthly) zip files in a given year + + all_data[[length(all_data) + 1]] = merge(codz_data, zjaw_data, + by = intersect(colnames(codz_data), colnames(zjaw_data)), + all.x = TRUE) + + } # end of loop for years (if more than 1 specified) all_data = do.call(rbind, all_data) all_data[all_data == 9999] = NA @@ -138,6 +169,7 @@ hydro_imgw_daily_bp = function(year, by.y = "Kod stacji", all.y = TRUE) } + #station selection if (!is.null(station)) { if (is.character(station)) { @@ -145,7 +177,7 @@ hydro_imgw_daily_bp = function(year, if (nrow(all_data) == 0) { stop("Selected station(s) is not available in the database.", call. = FALSE) } - } else if (is.numeric(station)) { + } else if (is.numeric(station)) { all_data = all_data[all_data$`Kod stacji` %in% station, ] if (nrow(all_data) == 0) { stop("Selected station(s) is not available in the database.", call. = FALSE) @@ -154,9 +186,9 @@ hydro_imgw_daily_bp = function(year, stop("Selected station(s) are not in the proper format.", call. = FALSE) } } - + all_data = all_data[do.call(order, all_data[grep(x = colnames(all_data), "Nazwa stacji|Rok hydro|w roku hydro|Dzie")]), ] all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...) - + return(all_data) -} +} \ No newline at end of file diff --git a/tests/testthat/test-meteo_imgw_daily.R b/tests/testthat/test-meteo_imgw_daily.R index 1c1ead0..416b06c 100644 --- a/tests/testthat/test-meteo_imgw_daily.R +++ b/tests/testthat/test-meteo_imgw_daily.R @@ -6,7 +6,7 @@ test_that("meteo_imgw_daily", { message("No internet connection! \n") return(invisible(NULL)) } else { - y <- 1900 # year not supported + y = 1900 # year not supported expect_message(meteo_imgw_daily(rank = "synop", year = y, status = TRUE, coords = TRUE, allow_failure = TRUE)) } From 572811ac00711c5d9d6aa4ca7ea9f9e9e1143634 Mon Sep 17 00:00:00 2001 From: bczernecki Date: Sun, 20 Oct 2024 19:23:59 +0200 Subject: [PATCH 02/19] fix: hydro-imgw --- R/hydro_imgw_annual.R | 8 ++++++-- R/hydro_imgw_monthly.R | 10 +++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/R/hydro_imgw_annual.R b/R/hydro_imgw_annual.R index 5557792..977c2a0 100644 --- a/R/hydro_imgw_annual.R +++ b/R/hydro_imgw_annual.R @@ -93,9 +93,13 @@ hydro_imgw_annual_bp = function(year = year, file1 = paste(temp2, dir(temp2), sep = "/")[1] if (translit) { - data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1))) + data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", file1))) } else { - data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250") + data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ",", + fileEncoding = "CP1250"), + warning = function(w) { + read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") + }) } colnames(data1) = meta[[value]]$parameters diff --git a/R/hydro_imgw_monthly.R b/R/hydro_imgw_monthly.R index cacb829..82a3466 100644 --- a/R/hydro_imgw_monthly.R +++ b/R/hydro_imgw_monthly.R @@ -75,9 +75,9 @@ hydro_imgw_monthly_bp = function(year, meta = hydro_metadata_imgw(interval) all_data = vector("list", length = length(catalogs)) + for (i in seq_along(catalogs)) { catalog = catalogs[i] - adres = paste0(base_url, interval_pl, "/", catalog, "/mies_", catalog, ".zip") temp = tempfile() @@ -87,9 +87,13 @@ hydro_imgw_monthly_bp = function(year, file1 = paste(temp2, dir(temp2), sep = "/")[1] if (translit) { - data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1))) + data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", file1))) } else { - data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250") + data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ",", + fileEncoding = "CP1250"), + warning = function(w) { + read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") + }) } colnames(data1) = meta[[1]][, 1] From 95b3d492f5ebf15816ae7a6dbbba70661bb63c9b Mon Sep 17 00:00:00 2001 From: bczernecki Date: Sun, 20 Oct 2024 20:13:35 +0200 Subject: [PATCH 03/19] fix: unit tests for ogimet datasets --- NEWS.md | 13 +++++-------- R/ogimet_daily.R | 8 ++++++-- tests/testthat/test-meteo_ogimet.R | 8 ++++---- tests/testthat/test-nearest_stations_ogimet.R | 10 +++++----- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/NEWS.md b/NEWS.md index 33a1568..2bb8e67 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,16 +1,13 @@ -# climate 1.2.3 - -* Major fixes for adjusting code to recognize different encoding and directory structure for (IMGW) hydrological datasets - - # climate 1.2.2 -* Major fixes for adjusting code to stay in line with CRAN policies -* Fixes for `hydro_imgw()` set of functions due to changes in encoding and metadata structure - +* Fixes for `hydro_imgw()` set of functions due to changes in the IMGW-PIB hydrological datasets + * adjusting code to recognize different encoding and directory structure + * adjusting changes in metadata +* Fix unit tests for ogimet-related datasets # climate 1.2.1 +* Major fixes for adjusting code to stay in line with CRAN policies * Corrected duplicated column names for IMGW-PIB stations * Adjusted encoding changes and documentation updates in `meteo_imgw_telemetry_stations()` diff --git a/R/ogimet_daily.R b/R/ogimet_daily.R index aaaf6f1..0b0c305 100644 --- a/R/ogimet_daily.R +++ b/R/ogimet_daily.R @@ -94,7 +94,10 @@ ogimet_daily_bp = function(date = date, day = format(dates[i], "%d") ndays = day - linkpl2 = paste("https://www.ogimet.com/cgi-bin/gsynres?lang=en&ind=", station_nr, "&ndays=32&ano=", year, "&mes=", month, "&day=", day, "&hora=", hour,"&ord=REV&Send=Send", sep = "") + linkpl2 = paste("https://www.ogimet.com/cgi-bin/gsynres?lang=en&ind=", + station_nr, "&ndays=32&ano=", + year, "&mes=", month, "&day=", day, + "&hora=", hour,"&ord=REV&Send=Send", sep = "") temp = tempfile() test_url(linkpl2, temp) if (is.na(file.size(temp)) | (file.size(temp) < 500)) { @@ -137,8 +140,9 @@ ogimet_daily_bp = function(date = date, # number of columns contain weird/non-standard data (e.g. only wind speed) if (ncol(test) <= 4) { - stop(paste0("Mandatory meteorological parameters (i.e. Temperature or precipitations) are not present. \nCheck content of the data using current URL:\n", + message(paste0("Mandatory meteorological parameters (i.e. Temperature or precipitations) are not present. \nCheck content of the data using current URL:\n", linkpl2)) + return(test) } if ((length(test[2, !is.na(test[2, ])]) == 6 & diff --git a/tests/testthat/test-meteo_ogimet.R b/tests/testthat/test-meteo_ogimet.R index 59edaaa..0a9b5c7 100644 --- a/tests/testthat/test-meteo_ogimet.R +++ b/tests/testthat/test-meteo_ogimet.R @@ -5,7 +5,7 @@ test_that("meteo_ogimet works!", { station = c(12330, 12375), coords = TRUE) # sometimes ogimet requires warm spin-up, so in order to pass CRAN tests: - if (is.data.frame(df) & nrow(df) > 0) { + if (is.data.frame(df) & nrow(df) > 15) { expect_true(any(colnames(df) %in% c("Lon", "Lat"))) } @@ -19,7 +19,7 @@ test_that("meteo_ogimet works!", { x = meteo_ogimet(interval = "hourly", date = c("2019-06-01", "2019-06-08"), station = c(12330), coords = TRUE) - if (is.data.frame(x) & nrow(df) > 0) { + if (is.data.frame(x) & nrow(df) > 20) { testthat::expect_true(nrow(x) > 100) } @@ -27,7 +27,7 @@ test_that("meteo_ogimet works!", { x = meteo_ogimet(interval = "hourly", date = c("2019-01-01", "2019-01-05"), station = 12120, coords = FALSE) - if (is.data.frame(x) & nrow(x) > 0) { + if (is.data.frame(x) & nrow(x) > 20) { testthat::expect_equal(unique(format(x$Date, "%Y")), "2019") } @@ -45,7 +45,7 @@ test_that("meteo_ogimet works!", { message("No internet connection! \n") return(invisible(NULL)) } else { - testthat::expect_error( + testthat::expect_message( meteo_ogimet( date = c(as.Date("2020-02-01"), Sys.Date() - 1), # date = c(Sys.Date() - 7, Sys.Date() - 1), diff --git a/tests/testthat/test-nearest_stations_ogimet.R b/tests/testthat/test-nearest_stations_ogimet.R index ac9c5aa..470dc42 100644 --- a/tests/testthat/test-nearest_stations_ogimet.R +++ b/tests/testthat/test-nearest_stations_ogimet.R @@ -2,15 +2,15 @@ context("meteo_imgw") test_that("nearest_stations_ogimet works!", { - x <- nearest_stations_ogimet(country = "United+Kingdom", point = c(-10, -50), add_map = TRUE, no_of_stations = 10) + x <- nearest_stations_ogimet(country = "United Kingdom", point = c(-10, -50), add_map = TRUE, no_of_stations = 10) - if (is.data.frame(x)) { + if (is.data.frame(x) && ncol(x) > 5) { testthat::expect_equal(nrow(x), 10) } x <- nearest_stations_ogimet(country = "Poland", point = c(10, 50), add_map = TRUE, no_of_stations = 10) - if (is.data.frame(x)) { + if (is.data.frame(x) && ncol(x) > 5) { testthat::expect_equal(nrow(x), 10) } @@ -21,8 +21,8 @@ test_that("nearest_stations_ogimet works!", { # allow_failure = FALSE, # no_of_stations = 10)) - x <- nearest_stations_ogimet(country = c("United+Kingdom", "Poland"), point = c(0, 0), add_map = TRUE, no_of_stations = 150) - if (is.data.frame(x)) { + x <- nearest_stations_ogimet(country = c("United Kingdom", "Poland"), point = c(0, 0), add_map = TRUE, no_of_stations = 150) + if (is.data.frame(x) && ncol(x) > 5) { expect_true(mean(x$distance) > 5000) } From 6edd6bd7dd4ccb9f5d006977f40eb0b322962988 Mon Sep 17 00:00:00 2001 From: bczernecki Date: Mon, 21 Oct 2024 13:38:55 +0200 Subject: [PATCH 04/19] fix: hydro imgw --- R/hydro_imgw_annual.R | 7 +++++ R/hydro_imgw_daily.R | 46 ++++++++++++++++++++------------ R/hydro_imgw_monthly.R | 7 +++++ tests/testthat/test-hydro_imgw.R | 11 ++++++++ 4 files changed, 54 insertions(+), 17 deletions(-) diff --git a/R/hydro_imgw_annual.R b/R/hydro_imgw_annual.R index 977c2a0..ed12721 100644 --- a/R/hydro_imgw_annual.R +++ b/R/hydro_imgw_annual.R @@ -100,6 +100,13 @@ hydro_imgw_annual_bp = function(year = year, warning = function(w) { read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") }) + if (ncol(data1) == 1) { + data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";", + fileEncoding = "UTF-8"), + warning = function(w) { + read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") + }) + } } colnames(data1) = meta[[value]]$parameters diff --git a/R/hydro_imgw_daily.R b/R/hydro_imgw_daily.R index 2dadd67..cdf8203 100644 --- a/R/hydro_imgw_daily.R +++ b/R/hydro_imgw_daily.R @@ -64,11 +64,11 @@ hydro_imgw_daily_bp = function(year, all_data = NULL codz_data = NULL zjaw_data = NULL - + temp = tempfile() test_url(link = paste0(base_url, interval_pl, "/"), output = temp) a = readLines(temp, warn = FALSE) - + ind = grep(readHTMLTable(a)[[1]]$Name, pattern = "/") catalogs = as.character(readHTMLTable(a)[[1]]$Name[ind]) catalogs = gsub(x = catalogs, pattern = "/", replacement = "") @@ -78,7 +78,7 @@ hydro_imgw_daily_bp = function(year, stop("Selected year(s) is/are not available in the database.", call. = FALSE) } meta = hydro_metadata_imgw(interval) - + for (i in seq_along(catalogs)) { catalog = catalogs[i] @@ -105,13 +105,20 @@ hydro_imgw_daily_bp = function(year, if (translit) { data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", file1))) - } else { - data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ",", - fileEncoding = "CP1250"), + } else { + data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ",", + fileEncoding = "CP1250"), + warning = function(w) { + read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") + }) + if (ncol(data1) == 1) { + data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";", + fileEncoding = "UTF-8"), warning = function(w) { read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") }) } + } # extra exception for a current year according to information provided by IMGW-PIB:, i.e.: # "Do czasu zakonczenia kontroli przeplywow rekordy z danymi z roku 2020 maja format: # Kod stacji #Nazwa stacji #Nazwa rzeki/jeziora #Rok hydrologiczny #Wskaznik miesiaca w roku hydrologicznym @@ -120,12 +127,12 @@ hydro_imgw_daily_bp = function(year, data1$flow = NA data1 = data1[, c(1:7, 10, 8:9)] } - + colnames(data1) = meta[[1]][, 1] codz_data = rbind(codz_data, data1) } # end of codz_ - + # start of zjaw_ section: if (grepl(x = iterator[j], "zjaw")) { address = paste0(base_url, interval_pl, "/", catalog, "/", iterator[j]) @@ -134,17 +141,22 @@ hydro_imgw_daily_bp = function(year, test_url(address, temp) unzip(zipfile = temp, exdir = temp2) file2 = paste(temp2, dir(temp2), sep = "/")[1] - if (translit) { - data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", file1))) + data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", file2))) } else { data2 = tryCatch(expr = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, sep = ",", fileEncoding = "CP1250"), warning = function(w) { read.csv(file2, header = FALSE, stringsAsFactors = FALSE, sep = ";") }) + if (ncol(data2) == 1) { + data2 = tryCatch(expr = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, sep = ";", + fileEncoding = "UTF-8"), + warning = function(w) { + read.csv(file2, header = FALSE, stringsAsFactors = FALSE, sep = ";") + }) + } } - colnames(data2) = meta[[2]][, 1] zjaw_data = rbind(zjaw_data, data2) } @@ -152,17 +164,17 @@ hydro_imgw_daily_bp = function(year, } #end of loop for (usually monthly) zip files in a given year all_data[[length(all_data) + 1]] = merge(codz_data, zjaw_data, - by = intersect(colnames(codz_data), colnames(zjaw_data)), - all.x = TRUE) + by = intersect(colnames(codz_data), colnames(zjaw_data)), + all.x = TRUE) } # end of loop for years (if more than 1 specified) - + all_data = do.call(rbind, all_data) all_data[all_data == 9999] = NA all_data[all_data == 99999.999] = NA all_data[all_data == 99.9] = NA all_data[all_data == 999] = NA - + if (coords) { all_data = merge(climate::imgw_hydro_stations, all_data, by.x = "id", @@ -177,7 +189,7 @@ hydro_imgw_daily_bp = function(year, if (nrow(all_data) == 0) { stop("Selected station(s) is not available in the database.", call. = FALSE) } - } else if (is.numeric(station)) { + } else if (is.numeric(station)) { all_data = all_data[all_data$`Kod stacji` %in% station, ] if (nrow(all_data) == 0) { stop("Selected station(s) is not available in the database.", call. = FALSE) @@ -189,6 +201,6 @@ hydro_imgw_daily_bp = function(year, all_data = all_data[do.call(order, all_data[grep(x = colnames(all_data), "Nazwa stacji|Rok hydro|w roku hydro|Dzie")]), ] all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...) - + return(all_data) } \ No newline at end of file diff --git a/R/hydro_imgw_monthly.R b/R/hydro_imgw_monthly.R index 82a3466..7b87a2e 100644 --- a/R/hydro_imgw_monthly.R +++ b/R/hydro_imgw_monthly.R @@ -94,6 +94,13 @@ hydro_imgw_monthly_bp = function(year, warning = function(w) { read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") }) + if (ncol(data1) == 1) { + data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";", + fileEncoding = "UTF-8"), + warning = function(w) { + read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") + }) + } } colnames(data1) = meta[[1]][, 1] diff --git a/tests/testthat/test-hydro_imgw.R b/tests/testthat/test-hydro_imgw.R index 377af01..223c859 100644 --- a/tests/testthat/test-hydro_imgw.R +++ b/tests/testthat/test-hydro_imgw.R @@ -15,6 +15,17 @@ test_that("hydro_imgw_not_available", { expect_error(suppressWarnings(hydro_imgw(interval = "monthly", year = 1960, coord = TRUE, station = 999, allow_failure = FALSE))) + h2022_2023 = hydro_imgw(interval = "monthly", + year = 2022:2023, + coord = TRUE, + allow_failure = FALSE) + + if (is.data.frame(h2022_2023) & nrow(h2022_2023 > 50000)) { + testthat::expect_true(is.data.frame(h2022_2023)) + testthat::expect_true(nrow(h2022_2023) > 50000) + } + + expect_error(suppressWarnings(hydro_imgw(interval = "semiannual_and_annual", year = 1960, coord = TRUE, station = "not available", allow_failure = FALSE))) From 9ac70757c4539c325d99f2874f69321f9737dddb Mon Sep 17 00:00:00 2001 From: bczernecki Date: Mon, 21 Oct 2024 14:17:30 +0200 Subject: [PATCH 05/19] fix: remove hydro semiannual and annual --- NAMESPACE | 1 - NEWS.md | 3 +- R/clean_metadata_hydro.R | 17 --- R/hydro_imgw.R | 14 +-- R/hydro_imgw_annual.R | 146 ---------------------- R/hydro_metadata_imgw.R | 8 +- R/nearest_stations_imgw.R | 2 +- README.md | 18 +-- man/hydro_imgw.Rd | 3 +- man/hydro_imgw_annual.Rd | 47 ------- tests/testthat/test-hydro_imgw.R | 13 +- tests/testthat/test-hydro_metadata_imgw.R | 4 - vignettes/getstarted.Rmd | 2 +- 13 files changed, 27 insertions(+), 251 deletions(-) delete mode 100644 R/hydro_imgw_annual.R delete mode 100644 man/hydro_imgw_annual.Rd diff --git a/NAMESPACE b/NAMESPACE index a9cb557..4c7b824 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,6 @@ export(.onAttach) export(hydro_imgw) -export(hydro_imgw_annual) export(hydro_imgw_daily) export(hydro_imgw_monthly) export(hydro_shortening_imgw) diff --git a/NEWS.md b/NEWS.md index 2bb8e67..968ab7d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,9 @@ # climate 1.2.2 -* Fixes for `hydro_imgw()` set of functions due to changes in the IMGW-PIB hydrological datasets +* Fixes and modifications for `hydro_imgw()` set of functions due to changes in the IMGW-PIB hydrological datasets * adjusting code to recognize different encoding and directory structure * adjusting changes in metadata + * removed option to download data for "semiannual and annual" time resolutions due to inconsistencies in the data * Fix unit tests for ogimet-related datasets # climate 1.2.1 diff --git a/R/clean_metadata_hydro.R b/R/clean_metadata_hydro.R index dda531c..a5dc1d6 100644 --- a/R/clean_metadata_hydro.R +++ b/R/clean_metadata_hydro.R @@ -24,22 +24,5 @@ clean_metadata_hydro = function(address, interval) { if (interval == "daily") { b = data.frame(parameters = a[1:10]) } - if (interval == "semiannual_and_annual") { - godzina = paste0(a[13], ":", a[14]) - data = c(a[10:12], godzina) - data_od = paste0("wystapienie_od_", data) - data_do = paste0("wystapienie_do_", data) - SPT = unlist(strsplit(a[8], "]/")) # stan/przeplyw/temperatura - SPT[1] = paste0(SPT[1], "]") - SPT[2] = paste0(SPT[2], "]") - b = NULL - for (i in seq_along(SPT)) { - tmp = c(a[1:7], SPT[i], data_od, data_do) - b = cbind(b, tmp) - } - b = list("H" = data.frame(parameters = b[, 1]), - "Q" = data.frame(parameters = b[, 2]), - "T" = data.frame(parameters = b[, 3])) - } return(b) } diff --git a/R/hydro_imgw.R b/R/hydro_imgw.R index 5cc0ffb..f03bf4c 100644 --- a/R/hydro_imgw.R +++ b/R/hydro_imgw.R @@ -3,8 +3,7 @@ #' Downloading daily, and monthly hydrological data from the measurement stations #' available in the danepubliczne.imgw.pl collection #' -#' @param interval temporal resolution of the data ("daily" , "monthly", -#' or "semiannual_and_annual") +#' @param interval temporal resolution of the data ("daily" or "monthly") #' @param year vector of years (e.g., 1966:2000) #' @param coords add coordinates of the stations (logical value TRUE or FALSE) #' @param value type of data (can be: state - "H" (default), flow - "Q", or @@ -38,20 +37,13 @@ hydro_imgw = function(interval, # dobowe calosc = hydro_imgw_daily(year = year, coords = coords, station = station, col_names = col_names, ...) } else if (interval == "monthly") { - #miesieczne + # miesieczne calosc = hydro_imgw_monthly(year = year, coords = coords, station = station, col_names = col_names, ...) - } else if (interval == "semiannual_and_annual") { - # polroczne_i_roczne - calosc = hydro_imgw_annual(year = year, - coords = coords, - value = value, - station = station, - col_names = col_names, ...) } else{ - stop("Wrong `interval` value. It should be either 'daily', 'monthly', or 'semiannual_and_annual'.", call. = FALSE) + stop("Wrong `interval` value. It should be either 'daily' or 'monthly'", call. = FALSE) } return(calosc) } diff --git a/R/hydro_imgw_annual.R b/R/hydro_imgw_annual.R deleted file mode 100644 index ed12721..0000000 --- a/R/hydro_imgw_annual.R +++ /dev/null @@ -1,146 +0,0 @@ -#' Semi-annual and annual hydrological data -#' -#' Downloading hydrological data for the semi-annual and annual period -#' available in the danepubliczne.imgw.pl collection -#' -#' @param year vector of years (e.g., 1966:2000) -#' @param coords add coordinates of the stations (logical value TRUE or FALSE) -#' @param value type of data (can be: state - "H" (default), flow - "Q", or temperature - "T") -#' @param station name or ID of hydrological station(s). -#' It accepts names (characters in CAPITAL LETTERS) or stations' IDs (numeric) -#' @param col_names three types of column names possible: -#' "short" - default, values with shorten names, -#' "full" - full English description, -#' "polish" - original names in the dataset -#' @param allow_failure logical - whether to proceed or stop on failure. By default set to TRUE (i.e. don't stop on error). For debugging purposes change to FALSE -#' @param ... other parameters that may be passed to the 'shortening' function that shortens column names -#' @importFrom XML readHTMLTable -#' @importFrom utils download.file unzip read.csv -#' @importFrom data.table fread -#' @export -#' @returns data.frame with historical hydrological data for the semi-annual and annual period -#' @examples -#' \donttest{ -#' hydro_yearly = hydro_imgw_annual(year = 2000, value = "H", station = "ANNOPOL") -#' } -hydro_imgw_annual = function(year, - coords = FALSE, - value = "H", - station = NULL, - col_names = "short", - allow_failure = TRUE, - ...) { - - if (allow_failure) { - tryCatch(hydro_imgw_annual_bp(year, - coords, - value, - station, - col_names, - ...), - error = function(e){ - message(paste("Problems with downloading data.", - "Run function with argument allow_failure = FALSE", - "to see more details"))}) - } else { - hydro_imgw_annual_bp(year, - coords, - value, - station, - col_names, - ...) - } -} - -#' @keywords internal -#' @noRd -hydro_imgw_annual_bp = function(year = year, - coords = coords, - value = value, - station = station, - col_names = col_names, - ...) { - - translit = check_locale() - base_url = "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_hydrologiczne/" - interval = "semiannual_and_annual" - interval_pl = "polroczne_i_roczne" - - temp = tempfile() - test_url(link = paste0(base_url, interval_pl, "/"), output = temp) - a = readLines(temp, warn = FALSE) - - ind = grep(readHTMLTable(a)[[1]]$Name, pattern = "/") - catalogs = as.character(readHTMLTable(a)[[1]]$Name[ind]) - catalogs = gsub(x = catalogs, pattern = "/", replacement = "") - - catalogs = catalogs[catalogs %in% as.character(year)] - if (length(catalogs) == 0) { - stop("Selected year(s) is/are not available in the database.", call. = FALSE) - } - meta = hydro_metadata_imgw(interval) - - all_data = vector("list", length = length(catalogs)) - for (i in seq_along(catalogs)) { - catalog = catalogs[i] - address = paste0(base_url, interval_pl, "/", catalog, "/polr_", value, "_", catalog, ".zip") - - temp = tempfile() - temp2 = tempfile() - test_url(address, temp) - #download.file(address, temp) - unzip(zipfile = temp, exdir = temp2) - file1 = paste(temp2, dir(temp2), sep = "/")[1] - - if (translit) { - data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", file1))) - } else { - data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ",", - fileEncoding = "CP1250"), - warning = function(w) { - read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") - }) - if (ncol(data1) == 1) { - data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";", - fileEncoding = "UTF-8"), - warning = function(w) { - read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") - }) - } - } - - colnames(data1) = meta[[value]]$parameters - all_data[[i]] = data1 - } - all_data = do.call(rbind, all_data) - all_data[all_data == 99999.999] = NA - all_data = all_data[, !duplicated(colnames(all_data))] - - # coords - if (coords) { - all_data = merge(climate::imgw_hydro_stations, all_data, by.x = "id", by.y = "Nazwa rzeki/jeziora", all.y = TRUE) - } - #station selection - if (!is.null(station)) { - if (is.character(station)) { - all_data = all_data[substr(all_data$`Nazwa stacji`, 1, nchar(station)) == station, ] - if (nrow(all_data) == 0) { - - stop("Selected station(s) is not available in the database.", call. = FALSE) - } - } else if (is.numeric(station)) { - all_data = all_data[all_data$`Kod stacji` %in% station, ] - if (nrow(all_data) == 0) { - stop("Selected station(s) is not available in the database.", call. = FALSE) - } - } else { - stop("Selected station(s) are not in the proper format.", call. = FALSE) - } - } - - all_data = all_data[order(all_data$`Nazwa stacji`, all_data$`Rok hydrologiczny`), ] - # adding option for shortening column names and removing duplicates - all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...) - - return(all_data) -} diff --git a/R/hydro_metadata_imgw.R b/R/hydro_metadata_imgw.R index 2002819..fcd5419 100644 --- a/R/hydro_metadata_imgw.R +++ b/R/hydro_metadata_imgw.R @@ -3,7 +3,7 @@ #' Downloading the description (metadata) to hydrological data available in the danepubliczne.imgw.pl repository. #' By default, the function returns a list or data frame for a selected subset #` -#' @param interval temporal resolution of the data ("daily" , "monthly", or "semiannual_and_annual") +#' @param interval temporal resolution of the data ("daily" or "monthly") #' @param allow_failure logical - whether to proceed or stop on failure. By default set to TRUE (i.e. don't stop on error). For debugging purposes change to FALSE #' @keywords internal #' @noRd @@ -37,12 +37,8 @@ hydro_metadata_imgw_bp = function(interval) { #miesieczne address_meta = paste0(base_url, "miesieczne/mies_info.txt") meta = clean_metadata_hydro(address_meta, interval) - } else if (interval == "semiannual_and_annual") { - # polroczne_i_roczne - address_meta = paste0(base_url, "polroczne_i_roczne/polr_info.txt") - meta = clean_metadata_hydro(address_meta, interval) } else { - stop("Wrong `interval` value. It should be either 'daily', 'monthly', or 'semiannual_and_annual'.") + stop("Wrong `interval` value. It should be either 'daily' or 'monthly'.") } return(meta) diff --git a/R/nearest_stations_imgw.R b/R/nearest_stations_imgw.R index d6af31f..741860f 100644 --- a/R/nearest_stations_imgw.R +++ b/R/nearest_stations_imgw.R @@ -97,7 +97,7 @@ nearest_stations_imgw_bp = function(type, if (type == "meteo") { result = unique(meteo_imgw_monthly(rank = rank, year = year, coords = TRUE)[, c(2:5)]) } else if (type == "hydro") { - result = unique(hydro_imgw_annual(year = year, coords = TRUE)[, c(1:4)]) + result = unique(hydro_imgw_monthly(year = year, coords = TRUE)[, c(1:4)]) } else { stop("You've provided wrong type argument; please use: \"meteo\", or \"hydro\"") } diff --git a/README.md b/README.md index 2a0100f..e323b52 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ It is a wrapper for `meteo_monthly()`, `meteo_daily()`, and `meteo_hourly()` - **hydro_imgw()** - Downloading hourly, daily, and monthly hydrological data from the SYNOP / CLIMATE / PRECIP stations available in the danepubliczne.imgw.pl collection. -It is a wrapper for previously developed set of functions such as: `hydro_annual()`, `hydro_monthly()`, and `hydro_daily()` +It is a wrapper for previously developed set of functions such as: `hydro_monthly()`, and `hydro_daily()` ### Auxiliary functions and datasets @@ -166,15 +166,15 @@ head(m) #> 580 21.3 -4.3 5.7 13.8 -8.3 9.4 #> 581 23.1 1.0 9.6 16.6 -1.8 36.4 -h = hydro_imgw(interval = "semiannual_and_annual", year = 2010:2011) +h = hydro_imgw(interval = "daily", year = 2010:2011) head(h) - id station riv_or_lake hyy idyy Mesu idex H beyy bemm bedd behm -3223 150210180 ANNOPOL Wisła (2) 2010 13 H 1 227 2009 12 19 NA -3224 150210180 ANNOPOL Wisła (2) 2010 13 H 2 319 NA NA NA NA -3225 150210180 ANNOPOL Wisła (2) 2010 13 H 3 531 2010 3 3 18 -3226 150210180 ANNOPOL Wisła (2) 2010 14 H 1 271 2010 8 29 NA -3227 150210180 ANNOPOL Wisła (2) 2010 14 H 1 271 2010 10 27 NA -3228 150210180 ANNOPOL Wisła (2) 2010 14 H 2 392 NA NA NA NA + id station riv_or_lake hyy idhyy dd H Q T mm thick id_ice p_ice +97843 150210180 ANNOPOL Wisła (2) 2010 1 1 287 436 NA 11 NA NA NA +507527 150210180 ANNOPOL Wisła (2) 2010 1 1 287 436 NA 11 NA NA NA +97844 150210180 ANNOPOL Wisła (2) 2010 1 2 282 412 NA 11 NA NA NA +507528 150210180 ANNOPOL Wisła (2) 2010 1 2 282 412 NA 11 NA NA NA +97845 150210180 ANNOPOL Wisła (2) 2010 1 3 272 368 NA 11 NA NA NA +507529 150210180 ANNOPOL Wisła (2) 2010 1 3 272 368 NA 11 NA NA NA ``` ## Example 5 diff --git a/man/hydro_imgw.Rd b/man/hydro_imgw.Rd index 756fab2..085a4d8 100644 --- a/man/hydro_imgw.Rd +++ b/man/hydro_imgw.Rd @@ -15,8 +15,7 @@ hydro_imgw( ) } \arguments{ -\item{interval}{temporal resolution of the data ("daily" , "monthly", -or "semiannual_and_annual")} +\item{interval}{temporal resolution of the data ("daily" or "monthly")} \item{year}{vector of years (e.g., 1966:2000)} diff --git a/man/hydro_imgw_annual.Rd b/man/hydro_imgw_annual.Rd deleted file mode 100644 index 6569b09..0000000 --- a/man/hydro_imgw_annual.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/hydro_imgw_annual.R -\name{hydro_imgw_annual} -\alias{hydro_imgw_annual} -\title{Semi-annual and annual hydrological data} -\usage{ -hydro_imgw_annual( - year, - coords = FALSE, - value = "H", - station = NULL, - col_names = "short", - allow_failure = TRUE, - ... -) -} -\arguments{ -\item{year}{vector of years (e.g., 1966:2000)} - -\item{coords}{add coordinates of the stations (logical value TRUE or FALSE)} - -\item{value}{type of data (can be: state - "H" (default), flow - "Q", or temperature - "T")} - -\item{station}{name or ID of hydrological station(s). -It accepts names (characters in CAPITAL LETTERS) or stations' IDs (numeric)} - -\item{col_names}{three types of column names possible: -"short" - default, values with shorten names, -"full" - full English description, -"polish" - original names in the dataset} - -\item{allow_failure}{logical - whether to proceed or stop on failure. By default set to TRUE (i.e. don't stop on error). For debugging purposes change to FALSE} - -\item{...}{other parameters that may be passed to the 'shortening' function that shortens column names} -} -\value{ -data.frame with historical hydrological data for the semi-annual and annual period -} -\description{ -Downloading hydrological data for the semi-annual and annual period -available in the danepubliczne.imgw.pl collection -} -\examples{ -\donttest{ -hydro_yearly = hydro_imgw_annual(year = 2000, value = "H", station = "ANNOPOL") -} -} diff --git a/tests/testthat/test-hydro_imgw.R b/tests/testthat/test-hydro_imgw.R index 223c859..26a4c1a 100644 --- a/tests/testthat/test-hydro_imgw.R +++ b/tests/testthat/test-hydro_imgw.R @@ -25,10 +25,13 @@ test_that("hydro_imgw_not_available", { testthat::expect_true(nrow(h2022_2023) > 50000) } + h2022_2023d = hydro_imgw(interval = "daily", + year = 2022:2023, + coord = TRUE, + allow_failure = FALSE) + if (is.data.frame(h2022_2023d) & nrow(h2022_2023d > 50000)) { + testthat::expect_true(is.data.frame(h2022_2023d)) + testthat::expect_true(nrow(h2022_2023d) > 50000) + } - expect_error(suppressWarnings(hydro_imgw(interval = "semiannual_and_annual", year = 1960, coord = TRUE, - station = "not available", allow_failure = FALSE))) - - expect_error(suppressWarnings(hydro_imgw(interval = "semiannual_and_annual", year = 1960, coord = TRUE, - station = 999, allow_failure = FALSE))) }) \ No newline at end of file diff --git a/tests/testthat/test-hydro_metadata_imgw.R b/tests/testthat/test-hydro_metadata_imgw.R index 8d79f87..887c09a 100644 --- a/tests/testthat/test-hydro_metadata_imgw.R +++ b/tests/testthat/test-hydro_metadata_imgw.R @@ -2,15 +2,11 @@ context("hydro-metadata") h_d <- suppressWarnings(hydro_metadata_imgw("daily")) h_m <- suppressWarnings(hydro_metadata_imgw("monthly")) -h_a <- suppressWarnings(hydro_metadata_imgw("semiannual_and_annual")) test_that("hydro-metadata works!", { if (is.list(h_d) && is.list(h_m) && is.list(h_a)) { expect_equal(dim(h_d[[1]]), c(10, 1)) expect_equal(dim(h_d[[2]]), c(10, 1)) expect_equal(dim(h_m[[1]]), c(10, 1)) - expect_equal(dim(h_a[[1]]), c(16, 1)) - expect_equal(dim(h_a[[2]]), c(16, 1)) - expect_equal(dim(h_a[[3]]), c(16, 1)) } }) diff --git a/vignettes/getstarted.Rmd b/vignettes/getstarted.Rmd index 20e4e3d..5c638f7 100644 --- a/vignettes/getstarted.Rmd +++ b/vignettes/getstarted.Rmd @@ -47,7 +47,7 @@ It is a wrapper for `meteo_monthly()`, `meteo_daily()`, and `meteo_hourly()` - **hydro_imgw()** - Downloading hourly, daily, and monthly hydrological data from the SYNOP / CLIMATE / PRECIP stations available in the danepubliczne.imgw.pl collection. -It is a wrapper for `hydro_annual()`, `hydro_monthly()`, and `hydro_daily()` +It is a wrapper for `hydro_monthly()`, and `hydro_daily()` ### Auxiliary functions and datasets From 6f200cea21e678637e84bd32bd4d9229cae07a32 Mon Sep 17 00:00:00 2001 From: bczernecki Date: Mon, 21 Oct 2024 14:26:16 +0200 Subject: [PATCH 06/19] fix: hydro test --- tests/testthat/test-hydro_metadata_imgw.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-hydro_metadata_imgw.R b/tests/testthat/test-hydro_metadata_imgw.R index 887c09a..c344052 100644 --- a/tests/testthat/test-hydro_metadata_imgw.R +++ b/tests/testthat/test-hydro_metadata_imgw.R @@ -4,7 +4,7 @@ h_d <- suppressWarnings(hydro_metadata_imgw("daily")) h_m <- suppressWarnings(hydro_metadata_imgw("monthly")) test_that("hydro-metadata works!", { - if (is.list(h_d) && is.list(h_m) && is.list(h_a)) { + if (is.list(h_d) && is.list(h_m)) { expect_equal(dim(h_d[[1]]), c(10, 1)) expect_equal(dim(h_d[[2]]), c(10, 1)) expect_equal(dim(h_m[[1]]), c(10, 1)) From 549de3317439405fb34447d3b608537d7aaf19a7 Mon Sep 17 00:00:00 2001 From: Bartosz Czernecki Date: Sat, 26 Oct 2024 10:55:54 +0200 Subject: [PATCH 07/19] fix: meteo_imgw --- R/meteo_imgw_daily.R | 13 ++++++++----- R/meteo_imgw_monthly.R | 13 +++++++------ 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/R/meteo_imgw_daily.R b/R/meteo_imgw_daily.R index b3ee431..154db91 100644 --- a/R/meteo_imgw_daily.R +++ b/R/meteo_imgw_daily.R @@ -121,12 +121,15 @@ meteo_imgw_daily_bp = function(rank, colnames(data1) = meta[[1]]$parameters file2 = paste(temp2, dir(temp2), sep = "/")[2] - if (translit) { - data2 = data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file2)) - } else { - data2 = suppressWarnings(read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")) + if (file.exists(file2)) { + if (translit) { + data2 = data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file2)) + } else { + data2 = suppressWarnings(read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")) + } + colnames(data2) = meta[[2]]$parameters } - colnames(data2) = meta[[2]]$parameters + unlink(c(temp, temp2)) # remove statuses if not needed: diff --git a/R/meteo_imgw_monthly.R b/R/meteo_imgw_monthly.R index 85bb651..fc86c47 100644 --- a/R/meteo_imgw_monthly.R +++ b/R/meteo_imgw_monthly.R @@ -134,14 +134,15 @@ meteo_imgw_monthly_bp = function(rank, if (rank != "precip") { # w opadowkach jest tylko jeden plik file2 = paste(temp2, dir(temp2), sep = "/")[2] - - if (translit) { - data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file2))) - } else { - data2 = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250") - } + if (file.exists(file2)) { + if (translit) { + data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file2))) + } else { + data2 = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250") + } colnames(data2) = meta[[2]]$parameters + } } # removing status if set From 031aa3038b08f14eef4907f01c5dc97c54102361 Mon Sep 17 00:00:00 2001 From: Bartosz Czernecki Date: Mon, 28 Oct 2024 22:05:00 +0100 Subject: [PATCH 08/19] fix: simplify read and fix IMGW tests --- .gitignore | 3 +- NEWS.md | 2 +- R/hydro_imgw_daily.R | 36 ++------------- R/hydro_imgw_monthly.R | 19 +------- R/imgw_read.R | 31 +++++++++++++ R/meteo_imgw_daily.R | 34 +++----------- R/meteo_imgw_hourly.R | 24 +++------- R/meteo_imgw_monthly.R | 16 ++----- tests/testthat/test-hydro_imgw.R | 16 ++++--- tests/testthat/test-meteo_imgw_datastore.R | 9 ++-- tests/testthat/test-meteo_metadata_imgw.R | 46 ++++++++++--------- .../test-stations_meteo_imgw_telemetry.R | 6 ++- 12 files changed, 98 insertions(+), 144 deletions(-) create mode 100644 R/imgw_read.R diff --git a/.gitignore b/.gitignore index bdb9742..74fac25 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,5 @@ covr_report.html lib docs pkgdown -.Renviron \ No newline at end of file +.Renviron +test-out.txt diff --git a/NEWS.md b/NEWS.md index 968ab7d..2720a3d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,7 @@ * adjusting code to recognize different encoding and directory structure * adjusting changes in metadata * removed option to download data for "semiannual and annual" time resolutions due to inconsistencies in the data -* Fix unit tests for ogimet-related datasets +* Fix unit tests for ogimet- and IMGW-related datasets # climate 1.2.1 diff --git a/R/hydro_imgw_daily.R b/R/hydro_imgw_daily.R index cdf8203..8dae9d7 100644 --- a/R/hydro_imgw_daily.R +++ b/R/hydro_imgw_daily.R @@ -25,7 +25,7 @@ hydro_imgw_daily = function(year, coords = FALSE, station = NULL, - col_names= "short", + col_names = "short", allow_failure = TRUE, ...) { @@ -103,22 +103,7 @@ hydro_imgw_daily_bp = function(year, unzip(zipfile = temp, exdir = temp2) file1 = paste(temp2, dir(temp2), sep = "/")[1] - if (translit) { - data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", file1))) - } else { - data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ",", - fileEncoding = "CP1250"), - warning = function(w) { - read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") - }) - if (ncol(data1) == 1) { - data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";", - fileEncoding = "UTF-8"), - warning = function(w) { - read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") - }) - } - } + data1 = imgw_read(translit, file1) # extra exception for a current year according to information provided by IMGW-PIB:, i.e.: # "Do czasu zakonczenia kontroli przeplywow rekordy z danymi z roku 2020 maja format: # Kod stacji #Nazwa stacji #Nazwa rzeki/jeziora #Rok hydrologiczny #Wskaznik miesiaca w roku hydrologicznym @@ -141,22 +126,7 @@ hydro_imgw_daily_bp = function(year, test_url(address, temp) unzip(zipfile = temp, exdir = temp2) file2 = paste(temp2, dir(temp2), sep = "/")[1] - if (translit) { - data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", file2))) - } else { - data2 = tryCatch(expr = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, sep = ",", - fileEncoding = "CP1250"), - warning = function(w) { - read.csv(file2, header = FALSE, stringsAsFactors = FALSE, sep = ";") - }) - if (ncol(data2) == 1) { - data2 = tryCatch(expr = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, sep = ";", - fileEncoding = "UTF-8"), - warning = function(w) { - read.csv(file2, header = FALSE, stringsAsFactors = FALSE, sep = ";") - }) - } - } + data2 = imgw_read(translit, file2) colnames(data2) = meta[[2]][, 1] zjaw_data = rbind(zjaw_data, data2) } diff --git a/R/hydro_imgw_monthly.R b/R/hydro_imgw_monthly.R index 7b87a2e..1e794ab 100644 --- a/R/hydro_imgw_monthly.R +++ b/R/hydro_imgw_monthly.R @@ -85,24 +85,7 @@ hydro_imgw_monthly_bp = function(year, test_url(adres, temp) unzip(zipfile = temp, exdir = temp2) file1 = paste(temp2, dir(temp2), sep = "/")[1] - - if (translit) { - data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", file1))) - } else { - data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ",", - fileEncoding = "CP1250"), - warning = function(w) { - read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") - }) - if (ncol(data1) == 1) { - data1 = tryCatch(expr = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";", - fileEncoding = "UTF-8"), - warning = function(w) { - read.csv(file1, header = FALSE, stringsAsFactors = FALSE, sep = ";") - }) - } - } - + data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]][, 1] all_data[[i]] = data1 } diff --git a/R/imgw_read.R b/R/imgw_read.R new file mode 100644 index 0000000..e2a8e21 --- /dev/null +++ b/R/imgw_read.R @@ -0,0 +1,31 @@ +#' Read IMGW hydrological and meteorological raw files that can be saved in different formats +#' +#' Internal function for reading IMGW files +#' @param translit logical whether translit detected and iconv needed for reading +#' @param fpath path to unzipped CSV-alike file +#' +#' @keywords internal +#' @noRd + +imgw_read = function(translit, fpath) { + + if (translit) { + data = as.data.frame(data.table::fread(cmd = paste("iconv -f ISO-8859-2 -t ASCII//TRANSLIT", fpath))) + } else { + data = tryCatch(expr = read.csv(fpath, header = FALSE, stringsAsFactors = FALSE, sep = ",", + fileEncoding = "CP1250"), + warning = function(w) { + read.csv(fpath, header = FALSE, stringsAsFactors = FALSE, sep = ";") + }) + + if (ncol(data) == 1) { + data = tryCatch(expr = read.csv(fpath, header = FALSE, stringsAsFactors = FALSE, sep = ";", + fileEncoding = "UTF-8"), + warning = function(w) { + read.csv(fpath, header = FALSE, stringsAsFactors = FALSE, sep = ";") + }) + } + + } + return(data) +} diff --git a/R/meteo_imgw_daily.R b/R/meteo_imgw_daily.R index 154db91..a2711ca 100644 --- a/R/meteo_imgw_daily.R +++ b/R/meteo_imgw_daily.R @@ -113,20 +113,12 @@ meteo_imgw_daily_bp = function(rank, test_url(addresses_to_download[j], temp) unzip(zipfile = temp, exdir = temp2) file1 = paste(temp2, dir(temp2), sep = "/")[1] - if (translit) { - data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1))) - } else { - data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250") - } + data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters file2 = paste(temp2, dir(temp2), sep = "/")[2] if (file.exists(file2)) { - if (translit) { - data2 = data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file2)) - } else { - data2 = suppressWarnings(read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")) - } + data2 = imgw_read(translit, file2) colnames(data2) = meta[[2]]$parameters } @@ -174,19 +166,11 @@ meteo_imgw_daily_bp = function(rank, test_url(addresses_to_download[j], temp) unzip(zipfile = temp, exdir = temp2) file1 = paste(temp2, dir(temp2), sep = "/")[1] - if (translit) { - data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1))) - } else { - data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250") - } + data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters file2 = paste(temp2, dir(temp2), sep = "/")[2] - if (translit) { - data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file2))) - } else { - data2 = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250") - } + data2 = imgw_read(translit, file2) colnames(data2) = meta[[2]]$parameters # usuwa statusy @@ -222,15 +206,9 @@ meteo_imgw_daily_bp = function(rank, temp2 = tempfile() test_url(addresses_to_download[j], temp) unzip(zipfile = temp, exdir = temp2) - file1 = paste(temp2, dir(temp2), sep = "/")[1] - if (translit) { - data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1))) - } else { - data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250") - } - + data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters - # usuwa statusy + # remove status if (status == FALSE) { data1[grep("^Status", colnames(data1))] = NULL } diff --git a/R/meteo_imgw_hourly.R b/R/meteo_imgw_hourly.R index 380a563..20c056b 100644 --- a/R/meteo_imgw_hourly.R +++ b/R/meteo_imgw_hourly.R @@ -113,12 +113,7 @@ meteo_imgw_hourly_bp = function(rank, test_url(addresses_to_download[j], temp) unzip(zipfile = temp, exdir = temp2) file1 = paste(temp2, dir(temp2), sep = "/") - - if (translit) { - data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1))) - } else { - data1 = suppressWarnings(read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")) - } + data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters @@ -153,24 +148,19 @@ meteo_imgw_hourly_bp = function(rank, test_url(addresses_to_download[j], temp) unzip(zipfile = temp, exdir = temp2) file1 = paste(temp2, dir(temp2), sep = "/") - - if (translit) { - data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1))) - } else { - data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250") - } - + data1 = imgw_read(translit, file1) + colnames(data1) = meta[[1]]$parameters - # usuwa statusy + # remove status if (status == FALSE) { data1[grep("^Status", colnames(data1))] = NULL } unlink(c(temp, temp2)) all_data[[length(all_data) + 1]] = data1 - } # koniec petli po zipach do pobrania - } # koniec if'a dla klimatu - } # koniec petli po glownych catalogach danych dobowych + } # end of looping for zip files + } # end of if statement for climate + } # end of loop over directories all_data = do.call(rbind, all_data) diff --git a/R/meteo_imgw_monthly.R b/R/meteo_imgw_monthly.R index fc86c47..afae4d7 100644 --- a/R/meteo_imgw_monthly.R +++ b/R/meteo_imgw_monthly.R @@ -123,25 +123,15 @@ meteo_imgw_monthly_bp = function(rank, #download.file(address, temp) unzip(zipfile = temp, exdir = temp2) file1 = paste(temp2, dir(temp2), sep = "/")[1] - - if (translit) { - data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1))) - } else { - data1 = suppressWarnings(read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")) - } + data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters if (rank != "precip") { # w opadowkach jest tylko jeden plik file2 = paste(temp2, dir(temp2), sep = "/")[2] if (file.exists(file2)) { - if (translit) { - data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file2))) - } else { - data2 = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250") - } - - colnames(data2) = meta[[2]]$parameters + data2 = imgw_read(translit, file2) + colnames(data2) = meta[[2]]$parameters } } diff --git a/tests/testthat/test-hydro_imgw.R b/tests/testthat/test-hydro_imgw.R index 26a4c1a..6a3131f 100644 --- a/tests/testthat/test-hydro_imgw.R +++ b/tests/testthat/test-hydro_imgw.R @@ -20,18 +20,22 @@ test_that("hydro_imgw_not_available", { coord = TRUE, allow_failure = FALSE) - if (is.data.frame(h2022_2023) & nrow(h2022_2023 > 50000)) { - testthat::expect_true(is.data.frame(h2022_2023)) - testthat::expect_true(nrow(h2022_2023) > 50000) + if (!is.null(h2022_2023)) { + if (is.data.frame(h2022_2023) & nrow(h2022_2023 > 50000)) { + testthat::expect_true(is.data.frame(h2022_2023)) + testthat::expect_true(nrow(h2022_2023) > 50000) + } } h2022_2023d = hydro_imgw(interval = "daily", year = 2022:2023, coord = TRUE, allow_failure = FALSE) - if (is.data.frame(h2022_2023d) & nrow(h2022_2023d > 50000)) { - testthat::expect_true(is.data.frame(h2022_2023d)) - testthat::expect_true(nrow(h2022_2023d) > 50000) + if (!is.null(h2022_2023d)) { + if (is.data.frame(h2022_2023d) & nrow(h2022_2023d > 50000)) { + testthat::expect_true(is.data.frame(h2022_2023d)) + testthat::expect_true(nrow(h2022_2023d) > 50000) + } } }) \ No newline at end of file diff --git a/tests/testthat/test-meteo_imgw_datastore.R b/tests/testthat/test-meteo_imgw_datastore.R index 8b567da..7b9c504 100644 --- a/tests/testthat/test-meteo_imgw_datastore.R +++ b/tests/testthat/test-meteo_imgw_datastore.R @@ -10,10 +10,11 @@ test_that("test-meteo_imgw_datastore", { imgw_telemetry = meteo_imgw_datastore(year = 2023, parameters = "t2m", stations = "PSZENNO") - - if (is.data.frame(imgw_telemetry) & nrow(imgw_telemetry) > 0) { - testthat::expect_true(is.data.frame(imgw_telemetry)) - testthat::expect_true(nrow(imgw_telemetry) > 50000) + if (!is.null(imgw_telemetry)) { + if (is.data.frame(imgw_telemetry) & nrow(imgw_telemetry) > 0) { + testthat::expect_true(is.data.frame(imgw_telemetry)) + testthat::expect_true(nrow(imgw_telemetry) > 50000) + } } } }) diff --git a/tests/testthat/test-meteo_metadata_imgw.R b/tests/testthat/test-meteo_metadata_imgw.R index 9e99236..f450260 100644 --- a/tests/testthat/test-meteo_metadata_imgw.R +++ b/tests/testthat/test-meteo_metadata_imgw.R @@ -1,32 +1,36 @@ context("meteo-metadata") -test_that("tests to be re-written meteo_metadata_imgw", { +test_that("meteo_metadata_imgw tests", { #skip("meteo-metadata skipping") if (!curl::has_internet()) { message("No internet connection! \n") return(invisible(NULL)) } else { - m_hs <- meteo_metadata_imgw("hourly", "synop") - m_hc <- meteo_metadata_imgw("hourly", "climate") - m_ds <- meteo_metadata_imgw("daily", "synop") - m_dc <- meteo_metadata_imgw("daily", "climate") - m_dp <- meteo_metadata_imgw("daily", "precip") - m_ms <- meteo_metadata_imgw("monthly", "synop") - m_mc <- meteo_metadata_imgw("monthly", "climate") - m_mp <- meteo_metadata_imgw("monthly", "precip") + m_hs = meteo_metadata_imgw("hourly", "synop") + m_hc = meteo_metadata_imgw("hourly", "climate") + m_ds = meteo_metadata_imgw("daily", "synop") + m_dc = meteo_metadata_imgw("daily", "climate") + m_dp = meteo_metadata_imgw("daily", "precip") + m_ms = meteo_metadata_imgw("monthly", "synop") + m_mc = meteo_metadata_imgw("monthly", "climate") + m_mp = meteo_metadata_imgw("monthly", "precip") expect_error(meteo_metadata_imgw("hourly", "precip")) - expect_equal(dim(m_hs[[1]]), c(107, 3)) - expect_equal(dim(m_hc[[1]]), c(22, 3)) - expect_equal(dim(m_ds[[1]]), c(65, 3)) - expect_equal(dim(m_ds[[2]]), c(23, 3)) - expect_equal(dim(m_dc[[1]]), c(18, 3)) - expect_equal(dim(m_dc[[2]]), c(13, 3)) - expect_equal(dim(m_dp[[1]]), c(16, 3)) - expect_equal(dim(m_ms[[1]]), c(60, 3)) - expect_equal(dim(m_ms[[2]]), c(22, 3)) - expect_equal(dim(m_mc[[1]]), c(27, 3)) - expect_equal(dim(m_mc[[2]]), c(12, 3)) - expect_equal(dim(m_mp[[1]]), c(14, 3)) + + if (is.list(m_hs) && is.list(m_ds) && is.list(m_ds) && is.list(m_dc) && + is.list(m_dp) && is.list(m_ms) && is.list(m_mc) && is.list(m_mp)) { + expect_equal(dim(m_hs[[1]]), c(107, 3)) + expect_equal(dim(m_hc[[1]]), c(22, 3)) + expect_equal(dim(m_ds[[1]]), c(65, 3)) + expect_equal(dim(m_ds[[2]]), c(23, 3)) + expect_equal(dim(m_dc[[1]]), c(18, 3)) + expect_equal(dim(m_dc[[2]]), c(13, 3)) + expect_equal(dim(m_dp[[1]]), c(16, 3)) + expect_equal(dim(m_ms[[1]]), c(60, 3)) + expect_equal(dim(m_ms[[2]]), c(22, 3)) + expect_equal(dim(m_mc[[1]]), c(27, 3)) + expect_equal(dim(m_mc[[2]]), c(12, 3)) + expect_equal(dim(m_mp[[1]]), c(14, 3)) + } } }) diff --git a/tests/testthat/test-stations_meteo_imgw_telemetry.R b/tests/testthat/test-stations_meteo_imgw_telemetry.R index f733ebb..36d11c6 100644 --- a/tests/testthat/test-stations_meteo_imgw_telemetry.R +++ b/tests/testthat/test-stations_meteo_imgw_telemetry.R @@ -8,8 +8,10 @@ test_that("test-stations_meteo_imgw_telemetry", { } else { df = stations_meteo_imgw_telemetry() - if (is.data.frame(df) & nrow(df) > 0) { - testthat::expect_true(is.data.frame(df)) + if (!is.null(df)) { + if (is.data.frame(df) & nrow(df) > 0) { + testthat::expect_true(is.data.frame(df)) + } } } From a9fdc010335007257a481752fab5b27a362b2197 Mon Sep 17 00:00:00 2001 From: Bartosz Czernecki Date: Mon, 28 Oct 2024 22:13:21 +0100 Subject: [PATCH 09/19] Update README.md --- README.md | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e323b52..b6ec5e1 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,13 @@ [![R-CMD-check](https://github.com/bczernecki/climate/workflows/R-CMD-check/badge.svg)](https://github.com/bczernecki/climate/actions) [![HTML5 check](https://github.com/bczernecki/climate/actions/workflows/html5-check.yaml/badge.svg?branch=master)](https://github.com/bczernecki/climate/actions/workflows/html5-check.yaml) [![Codecov test -coverage](https://codecov.io/gh/bczernecki/climate/branch/dev/graph/badge.svg)](https://app.codecov.io/gh/bczernecki/climate?branch=master) +coverage](https://codecov.io/gh/bczernecki/climate/branch/master/graph/badge.svg)](https://app.codecov.io/gh/bczernecki/climate?branch=master) [![CRAN status](https://www.r-pkg.org/badges/version/climate)](https://cran.r-project.org/package=climate) [![CRAN RStudio mirror -downloads](http://cranlogs.r-pkg.org/badges/climate)](https://cran.r-project.org/package=climate) -[![](http://cranlogs.r-pkg.org/badges/grand-total/climate?color=brightgreen)](https://cran.r-project.org/package=climate) +downloads](https://cranlogs.r-pkg.org/badges/climate)](https://cran.r-project.org/package=climate) +[![](https://cranlogs.r-pkg.org/badges/grand-total/climate?color=brightgreen)](https://cran.r-project.org/package=climate) The goal of the **climate** R package is to automatize downloading of *in-situ* meteorological @@ -46,7 +46,7 @@ install_github("bczernecki/climate") Any meteorological (aka SYNOP) station working under the World Meteorological Organizaton framework after year 2000 should be accessible. - **meteo_imgw()** - Downloading hourly, daily, and monthly meteorological data from the SYNOP/CLIMATE/PRECIP stations available in the danepubliczne.imgw.pl collection. -It is a wrapper for `meteo_monthly()`, `meteo_daily()`, and `meteo_hourly()` +It is a wrapper for `meteo_monthly()`, `meteo_daily()`, and `meteo_hourly()`. If 10-min dataset is needed then consider using **`meteo_imgw_datastore()`** - **meteo_noaa_hourly()** - Downloading hourly NOAA Integrated Surface Hourly (ISH) meteorological data - Some stations have > 100 years long history of observations @@ -68,10 +68,8 @@ country in the Ogimet repository - **nearest_stations_ogimet()** - Downloading information about nearest stations to the selected point using Ogimet repository - **nearest_stations_noaa()** - Downloading information about nearest stations to the selected point available for the selected country in the NOAA ISH meteorological repository - **nearest_stations_imgw()** - List of nearby meteorological or hydrological IMGW-PIB stations in Poland -- **imgw_meteo_stations** - Built-in metadata from the IMGW-PIB repository for meteorological stations, their geographical -coordinates, and ID numbers -- **imgw_hydro_stations** - Built-in metadata from the IMGW-PIB repository for hydrological stations, their geographical -coordinates, and ID numbers +- **imgw_meteo_stations** - Built-in metadata from the IMGW-PIB repository for meteorological stations, their geographical coordinates, and ID numbers +- **imgw_hydro_stations** - Built-in metadata from the IMGW-PIB repository for hydrological stations, their geographical coordinates, and ID numbers - **imgw_meteo_abbrev** - Dictionary explaining variables available for meteorological stations (from the IMGW-PIB repository) - **imgw_hydro_abbrev** - Dictionary explaining variables available for hydrological stations (from the IMGW-PIB repository) From ecdca8a5ee48a103fe44eb4abf158d4e84572986 Mon Sep 17 00:00:00 2001 From: Bartosz Czernecki Date: Mon, 28 Oct 2024 22:23:05 +0100 Subject: [PATCH 10/19] climate ver. 1.2.1 --- R/meteo_imgw_daily.R | 1 + tests/testthat/test-meteo_imgw_daily.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/R/meteo_imgw_daily.R b/R/meteo_imgw_daily.R index a2711ca..44adee5 100644 --- a/R/meteo_imgw_daily.R +++ b/R/meteo_imgw_daily.R @@ -206,6 +206,7 @@ meteo_imgw_daily_bp = function(rank, temp2 = tempfile() test_url(addresses_to_download[j], temp) unzip(zipfile = temp, exdir = temp2) + file1 = paste(temp2, dir(temp2), sep = "/")[1] data1 = imgw_read(translit, file1) colnames(data1) = meta[[1]]$parameters # remove status diff --git a/tests/testthat/test-meteo_imgw_daily.R b/tests/testthat/test-meteo_imgw_daily.R index 416b06c..108fa7a 100644 --- a/tests/testthat/test-meteo_imgw_daily.R +++ b/tests/testthat/test-meteo_imgw_daily.R @@ -22,7 +22,7 @@ test_that("check_column_with_coordinates", { station_with_coordinates = meteo_imgw_daily(rank = "precip", year = 2002, coords = TRUE, - station = "IMBRAMOWICE") + station = "IMBRAMOWICE", allow_failure = FALSE) if (is.data.frame(station_with_coordinates)) { expect_true(any(colnames(station_with_coordinates) %in% c("X", "Y"))) } From 66ff843849ce94dd430e33ac8ec850a07235fc5f Mon Sep 17 00:00:00 2001 From: Bartosz Czernecki Date: Mon, 28 Oct 2024 22:33:52 +0100 Subject: [PATCH 11/19] feat: covr ignore --- .covrignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .covrignore diff --git a/.covrignore b/.covrignore new file mode 100644 index 0000000..9e0f7a3 --- /dev/null +++ b/.covrignore @@ -0,0 +1,2 @@ +R/sounding_wyoming.R +R/imgw_read.R \ No newline at end of file From 25b554ba80e449cf32da4d985cfc939e1ba04d97 Mon Sep 17 00:00:00 2001 From: Bartosz Czernecki Date: Mon, 28 Oct 2024 22:46:35 +0100 Subject: [PATCH 12/19] adding .covrignore --- .Rbuildignore | 2 +- .covrignore | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 0738ebe..6037d25 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -16,4 +16,4 @@ vignettes/articles/usecase.Rmd ^pkgdown$ ^.codecov.yml$ ^tests$ - \ No newline at end of file +^.covrignore$ diff --git a/.covrignore b/.covrignore index 9e0f7a3..cb5a7eb 100644 --- a/.covrignore +++ b/.covrignore @@ -1,2 +1,3 @@ R/sounding_wyoming.R -R/imgw_read.R \ No newline at end of file +R/imgw_read.R +R/onAttach.R \ No newline at end of file From 0030daaf69046de237a983d5ba9072090dd161a5 Mon Sep 17 00:00:00 2001 From: Bartosz Czernecki Date: Mon, 28 Oct 2024 22:47:01 +0100 Subject: [PATCH 13/19] adding .covrignore --- .covrignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.covrignore b/.covrignore index cb5a7eb..2ea8cf1 100644 --- a/.covrignore +++ b/.covrignore @@ -1,3 +1,3 @@ R/sounding_wyoming.R R/imgw_read.R -R/onAttach.R \ No newline at end of file +R/onAttach.R From 6459f5d8126342ced8609d08b5c7cda5b2309d6d Mon Sep 17 00:00:00 2001 From: Bartosz Czernecki Date: Mon, 28 Oct 2024 23:52:17 +0100 Subject: [PATCH 14/19] feat: add calendar Date for hydro data --- NEWS.md | 1 + R/hydro_imgw_daily.R | 9 +++++++++ R/hydro_imgw_monthly.R | 9 +++++++++ R/hydro_shortening_imgw.R | 3 ++- data-raw/hydro_parametry_skroty.csv | 1 + data/imgw_hydro_abbrev.rda | Bin 994 -> 1010 bytes tests/testthat/test-hydro_imgw.R | 2 ++ 7 files changed, 24 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 2720a3d..18486f8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,7 @@ * adjusting changes in metadata * removed option to download data for "semiannual and annual" time resolutions due to inconsistencies in the data * Fix unit tests for ogimet- and IMGW-related datasets +* Resolving date formatting for hydrological data - the Date column represents calendar date # climate 1.2.1 diff --git a/R/hydro_imgw_daily.R b/R/hydro_imgw_daily.R index 8dae9d7..39d3595 100644 --- a/R/hydro_imgw_daily.R +++ b/R/hydro_imgw_daily.R @@ -170,6 +170,15 @@ hydro_imgw_daily_bp = function(year, } all_data = all_data[do.call(order, all_data[grep(x = colnames(all_data), "Nazwa stacji|Rok hydro|w roku hydro|Dzie")]), ] + # fix dates and add as seperate column: + yy_ind = grep(x = colnames(all_data), "Rok hydrologiczny") + mm_ind = grep(x = colnames(all_data), "kalendarzowy") + dd_ind = grep(x = colnames(all_data), "Dzie") + data_df = all_data[, c(yy_ind, mm_ind, dd_ind)] + data_df$yy = ifelse(data_df[, 2] >= 11, data_df[, 1] - 1, data_df[, 1]) + all_data$Data = as.Date(ISOdate(year = data_df$yy, month = data_df[, 2], day = data_df[, 3])) + all_data = all_data[, c(1:3, ncol(all_data), 4:(ncol(all_data) - 1)), ] + all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...) return(all_data) diff --git a/R/hydro_imgw_monthly.R b/R/hydro_imgw_monthly.R index 1e794ab..92c1ba7 100644 --- a/R/hydro_imgw_monthly.R +++ b/R/hydro_imgw_monthly.R @@ -116,6 +116,15 @@ hydro_imgw_monthly_bp = function(year, } } all_data = all_data[do.call(order, all_data[grep(x = colnames(all_data), "Nazwa stacji|Rok hydrologiczny|w roku hydro")]), ] + # fix dates and add as seperate column: + yy_ind = grep(x = colnames(all_data), "Rok hydrologiczny") + mm_ind = grep(x = colnames(all_data), "kalendarzowy") + data_df = all_data[, c(yy_ind, mm_ind)] + data_df$day = 1 + data_df$yy = ifelse(data_df[, 2] >= 11, data_df[, 1] - 1, data_df[, 1]) + all_data$Data = as.Date(ISOdate(year = data_df$yy, month = data_df[, 2], day = data_df$day)) + all_data = all_data[, c(1:3, ncol(all_data), 4:(ncol(all_data) - 1)), ] + all_data = hydro_shortening_imgw(all_data, col_names = col_names, ...) return(all_data) diff --git a/R/hydro_shortening_imgw.R b/R/hydro_shortening_imgw.R index e4bfeac..8a9a780 100644 --- a/R/hydro_shortening_imgw.R +++ b/R/hydro_shortening_imgw.R @@ -54,6 +54,7 @@ hydro_shortening_imgw = function(data, if (remove_duplicates == TRUE) { data = data[, !duplicated(colnames(data))] } - + + rownames(data) = NULL return(data) } diff --git a/data-raw/hydro_parametry_skroty.csv b/data-raw/hydro_parametry_skroty.csv index 22b425c..2558240 100644 --- a/data-raw/hydro_parametry_skroty.csv +++ b/data-raw/hydro_parametry_skroty.csv @@ -1,5 +1,6 @@ fullname;abbr_eng;fullname_eng;; Kod stacji;id;ID;; +Data;date;Date;; Dzien;dd;Day;; Dzień;dd;Day;; Miesiąc kalendarzowy;mm;Month;; diff --git a/data/imgw_hydro_abbrev.rda b/data/imgw_hydro_abbrev.rda index 88c894255611345ce2e129a862c0fe441cda1648..75a94a80c3bcbced76cce22a5a232326431ef8f1 100644 GIT binary patch literal 1010 zcmVZ ze~c)y+SBH4ZLjZk*V?MV@5XHnBGILUGk(d~{*2)+aC}`!XR`Oqsw$f%aVsC%*ARr+XK2e!W z$A(w`fn+HGmC=CRAJU9TWnEsMLMG(Y*R4IY9Ob9u6uSIX^M3Tw0NV%B&&Uu`M#Dhn zuRR8MJ7gTwkQWOfJse4*=JrRY`PyET7S^> z_)By8NvGrox>39VPi=DbpQ3=|e+m`=4Wd|4o^S&_xdejMP0Wom!BAv{u2{*OIJV41Hx%e+^)U6p|^w%HxsghnsFsyc-F^*w2zeX`6u|#nT zMf7E(t)yeD>ImD-s4IQfG<#Z#&r9GlYzq_QQJaYbhh^&Ftq-e}_=|$+Z2Z{Q72OLy|JAszzl>`w~{8{8Xls8cV_Q zH*{z#_-YRePDKm}W=QhWF{RT)?*nk@CSn`dX^R)qEN|b#D98TO6R^N?I!9n+_ zJ^t$36TQnQ?6xo~CeZh012`pGdf0$AIYFWMp60hp*}-{f8$VA~=;~dTG7(c{mRXc= zK2h{K(=6+){@n8trKs>I^pxb_pZ8&<0AU4H3^C)j1$XB_Ug+99wNS@9bKopyF&%tb z79-v%LB)7wS|Qp>M^CSyQ3SFFIIYD7F7Ki~8c-o|Q0$}k(><#6f+BrtDP@zT=Qxyl z%$ewyUMXsVa2lEMub9IRdr25-!0P&C!=*?zSI6R$vywwEB$0J!4;OJok&T|xE-#v2 g!>l%J4$J!L{eoX-XJ>!o&p-O}FOFgjYaj^#09lCYU;qFB literal 994 zcmV<810DPyiwFP!000001D#ggZW~1u-qdkRlZO7dDM%I45fUOnPTJH&LR7t~NfUu- zevBxx+SO!tY>#(m#+u#D#(PyoAReNR!5!)y-ACay2xsh>c(XP|>V@s^%y<6woHJ*A z*nH7gda>j?FAcGCDE*8KAZ0WRWd6ouu(k^u z+FC#={ZSnbP#jwGIFj9wxrbrNJ zZrwOppE}{D)|dw<86=mV!baVm8VvvXBR^Bp%Ls0lfUDNDIDLyNa&#)~_kZ)T|Br!1_J}D{L0Wm)AlxS?mywJXUPiQ3nKQt=6 zFErBA(6whOImW?y_dA5rPbTd*<=3|%8<3P?RW&P9+LN#rDi)1ks&pJp5wDc49j6`Zlf4$7vBo^c`U2 zKk_MF>nqH+$e)8aH)vV06IUuXBmp%?*Dy%Tq%h*bfdPvx8YKo-`XhUf7IrOYTSey6 z(>b`x{HYldQ+!M>QbXM5Q_KtagY~O%pLu~1t}@?co_*Nruprz(N}f>V_LNM^1>I}f zXK#|xEUdPqi2FuZIb_-m*7b)%U5_r+b$KV0di6Ej$S$Al79~-Fi$f1oN4J6Eeci1? zSF3KoK^>$jY&y-ht?|v>i}e5_N|fF2M`JgI6?d~;aL{>bkH7rxRPQVbdri!W3G}^H z2TqAbkLu7OrwAG!sD7uE9bA;N{>wy!j^0@*6ERh0X+;U=V?nPiZDqaHo_k)R6crwY zu96)5i$1ItD6F80A!gjR;NA?$3tgKvE!6Su3^|QiOb6eJ#fW!HR54!JtPpLrttVE{ zC<569oX}zeSN2dI_NkCKDE1Y5f1fJ7h)9oGld{V4a~w)N+D!CHe;2BPa2lDBub9J+ zx=9#n!pi!OgiF!bTo=oa&q@xxkVMv@JzVlhOg6hpyS%J^9kbfBIjrdG_8Py=&(HtH QpMUh{UxP#Er2Gf~0L#MU)c^nh diff --git a/tests/testthat/test-hydro_imgw.R b/tests/testthat/test-hydro_imgw.R index 6a3131f..9fa844b 100644 --- a/tests/testthat/test-hydro_imgw.R +++ b/tests/testthat/test-hydro_imgw.R @@ -24,6 +24,7 @@ test_that("hydro_imgw_not_available", { if (is.data.frame(h2022_2023) & nrow(h2022_2023 > 50000)) { testthat::expect_true(is.data.frame(h2022_2023)) testthat::expect_true(nrow(h2022_2023) > 50000) + testthat::expect_true(class(h2022_2023$date) == "Date") } } @@ -35,6 +36,7 @@ test_that("hydro_imgw_not_available", { if (is.data.frame(h2022_2023d) & nrow(h2022_2023d > 50000)) { testthat::expect_true(is.data.frame(h2022_2023d)) testthat::expect_true(nrow(h2022_2023d) > 50000) + testthat::expect_true(class(h2022_2023d$date) == "Date") } } From d858cf892892108283ea95f41a0884b6a1bba90e Mon Sep 17 00:00:00 2001 From: bczernecki Date: Tue, 29 Oct 2024 12:23:59 +0100 Subject: [PATCH 15/19] climate 1.2.2 --- .codecov.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.codecov.yml b/.codecov.yml index 5e4701b..9bf04a5 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -1,4 +1,8 @@ coverage: + ignore: + - "R/sounding_wyoming.R" + - "R/imgw_read.R" + - "R/onAttach.R" status: patch: default: From 0620a7a969ffdbbff2ef7c83bc799a37089dde2f Mon Sep 17 00:00:00 2001 From: bczernecki Date: Tue, 29 Oct 2024 19:19:52 +0100 Subject: [PATCH 16/19] fix: remove duplicates --- R/hydro_shortening_imgw.R | 2 +- R/meteo_shortening_imgw.R | 3 ++- README.md | 14 +++++++------- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/R/hydro_shortening_imgw.R b/R/hydro_shortening_imgw.R index 8a9a780..7f58ebb 100644 --- a/R/hydro_shortening_imgw.R +++ b/R/hydro_shortening_imgw.R @@ -54,7 +54,7 @@ hydro_shortening_imgw = function(data, if (remove_duplicates == TRUE) { data = data[, !duplicated(colnames(data))] } - + data = unique(data) rownames(data) = NULL return(data) } diff --git a/R/meteo_shortening_imgw.R b/R/meteo_shortening_imgw.R index 4f0fdbf..9fee6f0 100644 --- a/R/meteo_shortening_imgw.R +++ b/R/meteo_shortening_imgw.R @@ -56,6 +56,7 @@ meteo_shortening_imgw = function(data, col_names = "short", remove_duplicates = colnames(data)[orig_columns %in% abbrev$fullname] = abbrev$fullname_eng[matches] } } - + data = unique(data) + rownames(data) = NULL return(data) } diff --git a/README.md b/README.md index b6ec5e1..4d28f08 100644 --- a/README.md +++ b/README.md @@ -166,13 +166,13 @@ head(m) h = hydro_imgw(interval = "daily", year = 2010:2011) head(h) - id station riv_or_lake hyy idhyy dd H Q T mm thick id_ice p_ice -97843 150210180 ANNOPOL Wisła (2) 2010 1 1 287 436 NA 11 NA NA NA -507527 150210180 ANNOPOL Wisła (2) 2010 1 1 287 436 NA 11 NA NA NA -97844 150210180 ANNOPOL Wisła (2) 2010 1 2 282 412 NA 11 NA NA NA -507528 150210180 ANNOPOL Wisła (2) 2010 1 2 282 412 NA 11 NA NA NA -97845 150210180 ANNOPOL Wisła (2) 2010 1 3 272 368 NA 11 NA NA NA -507529 150210180 ANNOPOL Wisła (2) 2010 1 3 272 368 NA 11 NA NA NA + id station riv_or_lake date hyy idhyy dd H Q T mm thick +1 150210180 ANNOPOL Wisła (2) 2009-11-01 2010 1 1 287 436 NA 11 NA +2 150210180 ANNOPOL Wisła (2) 2009-11-02 2010 1 2 282 412 NA 11 NA +3 150210180 ANNOPOL Wisła (2) 2009-11-03 2010 1 3 272 368 NA 11 NA +4 150210180 ANNOPOL Wisła (2) 2009-11-04 2010 1 4 268 352 NA 11 NA +5 150210180 ANNOPOL Wisła (2) 2009-11-05 2010 1 5 264 336 NA 11 NA +6 150210180 ANNOPOL Wisła (2) 2009-11-06 2010 1 6 260 320 NA 11 NA ``` ## Example 5 From b5d7005ea85d7390d0544203708cd504d171f385 Mon Sep 17 00:00:00 2001 From: Jakub Nowosad Date: Thu, 31 Oct 2024 12:40:13 +0100 Subject: [PATCH 17/19] cleans readme --- README.md | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 4d28f08..b5f1a77 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ and hydrological data from publicly available repositories: ## Installation -The stable release of the **climate** package from the [CRAN](https://CRAN.R-project.org) reposity can be installed with: +The stable release of the **climate** package from the [CRAN](https://CRAN.R-project.org) repository can be installed with: ``` r install.packages("climate") @@ -73,8 +73,6 @@ country in the Ogimet repository - **imgw_meteo_abbrev** - Dictionary explaining variables available for meteorological stations (from the IMGW-PIB repository) - **imgw_hydro_abbrev** - Dictionary explaining variables available for hydrological stations (from the IMGW-PIB repository) - - ## Example 1 #### Download hourly dataset from NOAA ISH meteorological repository: @@ -90,8 +88,6 @@ head(noaa) # 2019 1 1 3 16.85 52.417 84 5.2 4.6 5 240 1021.2 1900 ``` - - ## Example 2 #### Finding a nearest meteorological stations in a given country using NOAA ISH data source: @@ -118,9 +114,9 @@ nearest_stations_ogimet(country = "United+Kingdom", ![100 nearest stations to given coordinates in UK](http://iqdata.eu/kolokwium/uk.png) - ## Example 3 #### Downloading daily (or hourly) data from a global (OGIMET) repository knowing its ID (see also `nearest_stations_ogimet()`): + ``` r library(climate) o = meteo_ogimet(date = c(Sys.Date() - 5, Sys.Date() - 1), @@ -178,12 +174,11 @@ head(h) ## Example 5 #### Create Walter & Lieth climatic diagram based on downloaded data - ``` r4 library(climate) library(dplyr) -df = meteo_imgw(interval = 'monthly', rank='synop', year = 1991:2019, station = "POZNAŃ") +df = meteo_imgw(interval = "monthly", rank = "synop", year = 1991:2019, station = "POZNAŃ") df2 = select(df, station:t2m_mean_mon, rr_monthly) monthly_summary = df2 %>% @@ -239,7 +234,6 @@ ggplot(co2, aes(date, co2_avg)) + ![CO2 monthly concentration, Mauna Loa observatory](http://iqdata.eu/kolokwium/co2_chart.svg) - ## Example 7 #### Use "climate" inside python environment via rpy2 @@ -251,14 +245,14 @@ import pandas as pd import datetime as dt # load climate package (make sure that it was installed in R before) -importr('climate') +importr("climate") # test functionality e.g. with meteo_ogimet function for New York - La Guardia: -df = robjects.r['meteo_ogimet'](interval = "daily", station = 72503, - date = robjects.StrVector(['2022-05-01', '2022-06-15'])) +df = robjects.r["meteo_ogimet"](interval = "daily", station = 72503, + date = robjects.StrVector(["2022-05-01", "2022-06-15"])) # optionally - transform object to pandas data frame and rename columns + fix datetime: res = pd.DataFrame(df).transpose() res.columns = df.colnames -res['Date'] = pd.TimedeltaIndex(res['Date'], unit='d') + dt.datetime(1970,1,1) +res["Date"] = pd.TimedeltaIndex(res["Date"], unit="d") + dt.datetime(1970,1,1) res.head >>> res[res.columns[0:7]].head() @@ -293,4 +287,3 @@ LaTeX/BibTeX version can be obtained with: library(climate) citation("climate") ``` - From d063eff3fa9a1a81f0c389fbc6146c610f8d19a0 Mon Sep 17 00:00:00 2001 From: bczernecki Date: Thu, 31 Oct 2024 13:06:11 +0100 Subject: [PATCH 18/19] fix: datastore as data.frame --- R/meteo_imgw_datastore.R | 5 +++-- man/meteo_imgw_datastore.Rd | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/R/meteo_imgw_datastore.R b/R/meteo_imgw_datastore.R index b864eab..80a609a 100644 --- a/R/meteo_imgw_datastore.R +++ b/R/meteo_imgw_datastore.R @@ -29,7 +29,8 @@ #' @param allow_failure logical - whether to proceed or stop on failure. By default set to TRUE (i.e. don't stop on error). For debugging purposes change to FALSE #' @import data.table #' @export -#' @returns data.frame with a raw meteorological measurements in 10-min intervals +#' @returns data.frame with a raw meteorological measurements in 10-min intervals. +#' Please note that this dataset is not validated by experts and may contain invalid values. #' @examples #' \donttest{ #' imgw_telemetry = meteo_imgw_datastore(year = 2022:2023, @@ -160,5 +161,5 @@ meteo_imgw_datastore_bp = function(year, colnames(all_data)[which(colnames(all_data) %in% c("V1", "V3"))] = c("id", "date_time") - return(all_data) + return(as.data.frame(all_data)) } diff --git a/man/meteo_imgw_datastore.Rd b/man/meteo_imgw_datastore.Rd index 1bc73f8..14112cb 100644 --- a/man/meteo_imgw_datastore.Rd +++ b/man/meteo_imgw_datastore.Rd @@ -44,7 +44,8 @@ Default \code{NULL} means to download data for all available stations. \item{allow_failure}{logical - whether to proceed or stop on failure. By default set to TRUE (i.e. don't stop on error). For debugging purposes change to FALSE} } \value{ -data.frame with a raw meteorological measurements in 10-min intervals +data.frame with a raw meteorological measurements in 10-min intervals. +Please note that this dataset is not validated by experts and may contain invalid values. } \description{ Downloading hourly (meteorological) data from the telemetric stations From 54154294389fed2edfb9786a64d0cfe096730056 Mon Sep 17 00:00:00 2001 From: bczernecki Date: Fri, 1 Nov 2024 12:56:51 +0100 Subject: [PATCH 19/19] fix: ogimet_hourly logic --- NEWS.md | 1 + R/ogimet_hourly.R | 26 ++++++++++++++------------ man/ogimet_hourly.Rd | 3 ++- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/NEWS.md b/NEWS.md index 18486f8..f42c93d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,7 @@ * removed option to download data for "semiannual and annual" time resolutions due to inconsistencies in the data * Fix unit tests for ogimet- and IMGW-related datasets * Resolving date formatting for hydrological data - the Date column represents calendar date +* Corrected logic in downloading hourly OGIMET dataset # climate 1.2.1 diff --git a/R/ogimet_hourly.R b/R/ogimet_hourly.R index b69c63e..30c8131 100644 --- a/R/ogimet_hourly.R +++ b/R/ogimet_hourly.R @@ -6,7 +6,8 @@ #' @param coords add geographical coordinates of the station (logical value TRUE or FALSE) #' @param station WMO ID of meteorological station(s). Character or numeric vector #' @param precip_split whether to split precipitation fields into 6/12/24h; default: TRUE -#' @param allow_failure logical - whether to proceed or stop on failure. By default set to TRUE (i.e. don't stop on error). For debugging purposes change to FALSE +#' @param allow_failure logical - whether to proceed or stop on failure. By default set to TRUE (i.e. don't stop on error). +#' For debugging purposes change to FALSE #' @importFrom XML readHTMLTable #' #' @export @@ -51,9 +52,10 @@ ogimet_hourly_bp = function(date = date, dates = seq.Date(min(as.Date(date)), max(as.Date(date)), by = "1 month") - 1 dates = unique(c(dates, as.Date(max(date)))) + diff_dates = diff(dates) # initalizing empty data frame for storing results: - data_station <- + data_station = data.frame( "Date" = character(), "hour" = character(), @@ -92,7 +94,10 @@ ogimet_hourly_bp = function(date = date, year = format(dates[i], "%Y") month = format(dates[i], "%m") day = format(dates[i], "%d") - ndays = day + ndays = as.numeric(diff_dates[i - 1]) + ndays = ifelse(ndays == 0, 1, ndays) + ndays = sprintf("%02d", ndays) + linkpl2 = paste("https://www.ogimet.com/cgi-bin/gsynres?ind=", station_nr, "&lang=en&decoded=yes&ndays=", @@ -105,12 +110,14 @@ ogimet_hourly_bp = function(date = date, day, "&hora=23", sep = "") - if (month == "01") linkpl2 = paste("http://ogimet.com/cgi-bin/gsynres?ind=", + if (month == "01") { + linkpl2 = paste("http://ogimet.com/cgi-bin/gsynres?ind=", station_nr, "&lang=en&decoded=yes&ndays=31&ano=", year, "&mes=02&day=1&hora=00", sep = "") + } temp = tempfile() test_url(linkpl2, temp) @@ -163,23 +170,17 @@ ogimet_hourly_bp = function(date = date, }# end of looping for stations if (nrow(data_station) > 0) { - - data_station = data_station[!duplicated(data_station), ] - + data_station = data_station[!duplicated(data_station), ] # converting character to proper field representation: - # get rid off "---" standing for missing/blank fields: data_station[which(data_station == "--" | data_station == "---" | data_station == "----" | data_station == "-----", arr.ind = TRUE)] = NA - # changing time.. data_station$Date = strptime(paste(data_station$Date, data_station$hour), "%m/%d/%Y %H:%M", tz = 'UTC') data_station$hour = NULL - # other columns to numeric: columns = c("TC", "TdC", "ffkmh", "Gustkmh", "P0hPa", "PseahPa", "PTnd", "Nt", "Nh", "HKm", "InsoD1", "Viskm", "Snowcm", "station_ID") columns = colnames(data_station)[(colnames(data_station) %in% columns)] - suppressWarnings(data_station[, columns] <- as.data.frame(sapply(data_station[,columns], as.numeric))) @@ -216,6 +217,7 @@ ogimet_hourly_bp = function(date = date, } # end of checking whether object is empty + data_station = unique(data_station) + rownames(data_station) = NULL return(data_station) - } diff --git a/man/ogimet_hourly.Rd b/man/ogimet_hourly.Rd index 1da7b40..c3f880d 100644 --- a/man/ogimet_hourly.Rd +++ b/man/ogimet_hourly.Rd @@ -21,7 +21,8 @@ ogimet_hourly( \item{precip_split}{whether to split precipitation fields into 6/12/24h; default: TRUE} -\item{allow_failure}{logical - whether to proceed or stop on failure. By default set to TRUE (i.e. don't stop on error). For debugging purposes change to FALSE} +\item{allow_failure}{logical - whether to proceed or stop on failure. By default set to TRUE (i.e. don't stop on error). +For debugging purposes change to FALSE} } \value{ data.frame with historical meteorological data for hourly time interval