From 2a77981d06766105f48224bc505ed32ad91ca270 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 22 Nov 2023 15:58:15 -0800 Subject: [PATCH 01/36] Update requirements for compatibility with Python 3.11 --- requirements.txt | 9 +++++---- setup.py | 8 ++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6fd9d33..770bea0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -beautifulsoup4==4.5.3 -lxml==4.4.1 +beautifulsoup4==4.12.2 +lxml==4.9.3 numpy>=1.21.3 -pandas==1.3.4 +pandas==1.5.3 pdftotext==2.2.2 python-dateutil==2.8.2 python-dotenv==0.19.2 @@ -10,4 +10,5 @@ selenium==3.8.0 sphinx-readable-theme==1.3.0 sphinx-rtd-theme-1.0.0 Sphinx==4.3.0 -scipy>=1.8.0 \ No newline at end of file +scipy>=1.8.0 +tabula-py==2.4.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 75481ac..04211af 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- import setuptools @@ -11,14 +10,15 @@ 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.11', 'Operating System :: OS Independent', ), - description='Contains various web-scraping utilities used in Hydro/ResOps group at MBK Engineers', + description='Contains various web-scraping utilities used at MBK Engineers', url='https://github.com/MBKEngineers/collect.git', packages=setuptools.find_packages(), setup_requires=['numpy>=1.21.3'], - install_requires=['beautifulsoup4==4.5.3', - 'lxml>=4.4.1', + install_requires=['beautifulsoup4==4.12.2', + 'lxml==4.9.3', 'pandas==1.5.3', 'python-dateutil==2.8.2', 'python-dotenv==0.19.2', From 0b5ea9f5ecfdb171d48ed15fb74ead54fea34930 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 22 Nov 2023 16:00:32 -0800 Subject: [PATCH 02/36] Update how user is prompted to install pdftotext --- collect/dwr/swp.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/collect/dwr/swp.py b/collect/dwr/swp.py index d186980..43c9e6c 100644 --- a/collect/dwr/swp.py +++ b/collect/dwr/swp.py @@ -9,13 +9,17 @@ import re import pandas as pd -try: - import pdftotext -except: - print('Module pdftotext is required for SWP report collection. Install with `pip install pdftotext==2.2.2`') import requests +def prompt_installation_and_exit(): + try: + import pdftotext + except: + print('Module pdftotext is required for SWP report collection. Install with `pip install pdftotext==2.2.2`') + exit() + + def get_report_catalog(console=True): """ prints list of available SWP report names to console @@ -108,6 +112,7 @@ def get_raw_text(report, filename=None, preserve_white_space=True): f.seek(0) # parse PDF and extract as string + prompt_installation_and_exit() content = pdftotext.PDF(f, raw=False, physical=True)[0] # optionally export the raw report as text @@ -281,6 +286,7 @@ def get_oco_tabular_data(report): f.seek(0) # parse PDF and extract as string + prompt_installation_and_exit() content = list(pdftotext.PDF(f, raw=False, physical=True)) # report information From 5a51fb5fd70608c024b27a512c3d2dbe68426e1a Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Mon, 27 Nov 2023 10:43:25 -0800 Subject: [PATCH 03/36] Update setup.py requirements definition --- requirements.txt | 14 -------------- setup.py | 5 +++-- 2 files changed, 3 insertions(+), 16 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 770bea0..0000000 --- a/requirements.txt +++ /dev/null @@ -1,14 +0,0 @@ -beautifulsoup4==4.12.2 -lxml==4.9.3 -numpy>=1.21.3 -pandas==1.5.3 -pdftotext==2.2.2 -python-dateutil==2.8.2 -python-dotenv==0.19.2 -requests>=2.26.0 -selenium==3.8.0 -sphinx-readable-theme==1.3.0 -sphinx-rtd-theme-1.0.0 -Sphinx==4.3.0 -scipy>=1.8.0 -tabula-py==2.4.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 04211af..a186bf3 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setuptools.setup(name='collect', - version='0.0.1', + version='0.0.2', author='MBK Engineers', author_email='narlesky@mbkengineers.com', classifiers=( @@ -18,9 +18,10 @@ packages=setuptools.find_packages(), setup_requires=['numpy>=1.21.3'], install_requires=['beautifulsoup4==4.12.2', + 'html5lib==1.1', 'lxml==4.9.3', 'pandas==1.5.3', - 'python-dateutil==2.8.2', + 'python-dateutil>=2.8.2', 'python-dotenv==0.19.2', 'requests>=2.26.0', 'scipy>=1.8.0', From 8bb965ffe74b8ff50c9a389c3b6e2e13f1748379 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Mon, 27 Nov 2023 11:28:37 -0800 Subject: [PATCH 04/36] Add initial tests for alert, usace.wcds, cnrfc modules to test python 3.11 conversion; refactor to fix io import and inject PyOpenSSl for python 3.11 compatibility --- collect/tests/__init__.py | 0 collect/tests/test_basics.py | 305 ++++++++++++++++ collect/usace/wcds.py | 111 +++--- tests/2019040412_N_SanJoaquin_csv_export.csv | 363 ------------------- tests/test_cnrfc.py | 83 ----- 5 files changed, 368 insertions(+), 494 deletions(-) create mode 100644 collect/tests/__init__.py create mode 100644 collect/tests/test_basics.py delete mode 100755 tests/2019040412_N_SanJoaquin_csv_export.csv delete mode 100644 tests/test_cnrfc.py diff --git a/collect/tests/__init__.py b/collect/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/collect/tests/test_basics.py b/collect/tests/test_basics.py new file mode 100644 index 0000000..dce2cbb --- /dev/null +++ b/collect/tests/test_basics.py @@ -0,0 +1,305 @@ +""" +collect.tests.test_basics +============================================================ +initial test suite for collect data access and utility functions; note: these tests require internet connection +""" +# -*- coding: utf-8 -*- +import datetime as dt +import io +import os +import textwrap +import unittest +import unittest.mock + +from dotenv import load_dotenv +import pandas as pd + +from collect.dwr import cdec +from collect.dwr import casgem +from collect.dwr import cawdl +from collect.dwr import b120 +from collect.dwr import swp + +from collect import alert +from collect import cnrfc +from collect import cvo +from collect import nid +from collect import usgs +from collect.usace import wcds + + +class TestSacAlert(unittest.TestCase): + + def test_alert_get_site_notes(self): + """ + test the function for retrieving site metadata produces the expected entries + """ + result = alert.get_site_notes('1137') + self.assertEqual(result['site_id'], '1137') + self.assertEqual(result['Facility ID:'], 'A31') + self.assertEqual(result['Location:'], 'Upstream of Alpine Frost Dr. west of Bruceville Rd.') + self.assertEqual(result['Date Installed:'], '2/6/1994') + + def test_alert_get_data(self): + result = alert.get_data('1137', dt.datetime(2021, 3, 18, 14), dt.datetime(2021, 3, 18, 20), device_ids=[4]) + + # check the queried sensor values for the specified date range + self.assertEqual(result['data']['Value'].tolist(), + [0.0, 0.04, 0.0, 0.04, 0.04, 0.0, 0.0, 0.04, 0.0, 0.04, 0.04, 0.04, 0.0, 0.04, 0.0]) + + # check the associated date/time stamps + self.assertEqual(result['data']['Receive'].tolist()[:4], + ['2021-03-18 14:00:25', '2021-03-18 14:36:20', '2021-03-18 15:00:30', '2021-03-18 15:24:21']) + + def test_alert_get_site_sensors(self): + """ + test the function for retrieving site metadata sensors list produces the expected number of entries + """ + self.assertEqual(len(alert.get_site_sensors(1122)['sensors']), 7) + + def test_alert_get_sites(self): + """ + test the function for retrieving site list for a particular gage types returns the expected number of entries + """ + self.assertEqual(alert.get_sites(as_dataframe=True, datatype='rain').shape, (81, 12)) + + +class TestCNRFC(unittest.TestCase): + + @property + def deterministic_frame(self): + """ + fixture for testing watershed deterministic file handling + """ + if not hasattr(self, '_deterministic_frame'): + text_data = io.StringIO(textwrap.dedent("""\ + GMT,CMPC1,NHGC1,MSGC1,FRGC1,EDOC1,SOSC1,MHBC1,MCNC1 + ,QINE,QINE,QINE,QINE,QINE,QINE,QINE,QINE + 2019-03-30 12:00:00,2.45972,0.70641,0.08901,0.22803,1.03512,0.71908,2.83132,2.58248 + 2019-03-30 13:00:00,2.44774,0.67366,0.08901,0.21302,1.03512,0.70908,2.88032,2.56875 + 2019-03-30 14:00:00,2.43568,0.67408,0.08901,0.19602,1.03011,0.71208,2.84732,2.53694 + 2019-03-30 15:00:00,2.42353,0.67424,0.08901,0.22903,1.02611,0.70608,2.83132,2.52791 + 2019-03-30 16:00:00,2.41129,0.67558,0.08901,0.20202,1.02211,0.70208,2.83132,2.50098 + 2019-03-30 17:00:00,2.39895,0.60832,0.08901,0.21002,1.01811,0.70208,2.81431,2.4876 + 2019-03-30 18:00:00,2.38652,0.64266,0.08901,0.18302,1.00911,0.69608,2.83132,2.46544 + 2019-03-30 19:00:00,2.38077,0.67591,0.08701,0.20202,1.00511,0.69208,2.79831,2.45222 + 2019-03-30 20:00:00,2.37473,0.67491,0.08701,0.18602,1.00111,0.69208,2.79831,2.44343 + 2019-03-30 21:00:00,2.36843,0.67599,0.08601,0.19602,0.99211,0.68908,2.79831,2.42595 + 2019-03-30 22:00:00,2.36185,0.67599,0.08601,0.03374,0.99211,0.68208,2.74931,2.41724 + 2019-03-30 23:00:00,2.35498,0.71033,0.08601,0.19102,0.98411,0.68208,2.78231,2.40856 + 2019-03-31 00:00:00,2.34785,0.67608,0.08401,0.16702,0.98011,0.67608,2.74931,2.39559 + 2019-03-31 01:00:00,2.32832,0.67508,0.08401,0.19902,0.97111,0.66607,2.7163,2.38698 + 2019-03-31 02:00:00,2.30886,0.67608,0.08401,0.16302,0.96311,0.65907,2.7003,2.36982 + 2019-03-31 03:00:00,2.28949,0.64274,0.08401,0.19302,0.96311,0.65607,2.7163,2.36555 + 2019-03-31 04:00:00,2.2702,0.6084,0.08401,0.03239,0.95511,0.66907,2.7163,2.34852 + 2019-03-31 05:00:00,2.25098,0.60724,0.08401,0.17702,0.94711,0.65907,2.6843,2.34004 + 2019-03-31 06:00:00,2.23185,0.64141,0.08401,0.15302,0.9261,0.65907,2.6683,2.33159 + 2019-03-31 07:00:00,2.22434,0.60915,0.08401,0.16402,0.9141,0.65607,2.6843,2.31896 + 2019-03-31 08:00:00,2.21675,0.5749,0.08201,0.17202,0.9141,0.66207,2.62029,2.3022 + 2019-03-31 09:00:00,2.2091,0.60815,0.08201,0.15802,0.9101,0.65907,2.63629,2.2897 + 2019-03-31 10:00:00,2.20137,0.64241,0.08101,0.16702,0.9141,0.65907,2.58829,2.27725 + 2019-03-31 11:00:00,2.19357,0.60924,0.08101,0.16802,0.9141,0.65907,2.57229,2.26486 + 2019-03-31 12:00:00,2.1857,0.57507,0.08101,0.15402,0.9101,0.65307,2.57229,2.25253 + 2019-03-31 13:00:00,2.17421,0.60832,0.08101,0.15102,0.9141,0.65307,2.58829,2.23544 + 2019-03-31 14:00:00,2.16274,0.64257,0.08101,0.18902,0.9101,0.65607,2.55728,2.21627 + 2019-03-31 15:00:00,2.15131,0.60832,0.08101,0.03094,0.9101,0.64907,2.57229,2.20199 + 2019-03-31 16:00:00,2.1399,0.54081,0.08101,0.14802,0.9061,0.64307,2.55728,2.18779 + 2019-03-31 17:00:00,2.12853,0.54081,0.08101,0.03072,0.9061,0.64607,2.57229,2.16429 + 2019-03-31 18:00:00,2.11718,0.57515,0.08101,0.14502,0.8981,0.64607,2.57229,2.15495 + 2019-03-31 19:00:00,2.11344,0.57523,0.08101,0.15802,0.9021,0.64007,2.55728,2.13637 + 2019-03-31 20:00:00,2.10957,0.57531,0.07901,0.14302,0.8981,0.64307,2.54128,2.13174 + 2019-03-31 21:00:00,2.10557,0.5764,0.07901,0.16502,0.8861,0.63707,2.55728,2.12713 + 2019-03-31 22:00:00,2.10143,0.63047,0.07901,0.15202,0.8901,0.62707,2.54128,2.11793 + 2019-03-31 23:00:00,2.09715,0.6617,0.07901,0.13502,0.8821,0.62707,2.54128,2.11793 + 2019-04-01 00:00:00,2.09274,0.64507,0.07901,0.03001,0.8781,0.61807,2.51028,2.11334 + 2019-04-01 01:00:00,2.08882,0.61182,0.07701,0.02992,0.8741,0.62107,2.52628,2.10875 + 2019-04-01 02:00:00,2.08483,0.51206,0.07701,0.02983,0.8701,0.61807,2.49528,2.09962 + 2019-04-01 03:00:00,2.08079,0.51205,0.07701,0.02974,0.8661,0.61207,2.48028,2.09506 + 2019-04-01 04:00:00,2.07668,0.51206,0.07701,0.02964,0.8621,0.61207,2.49528,2.09051 + 2019-04-01 05:00:00,2.07251,0.51206,0.07701,0.02955,0.8541,0.61507,2.48028,2.08144 + 2019-04-01 06:00:00,2.06829,0.51206,0.07701,0.02946,0.85109,0.62107,2.44927,2.07692 + 2019-04-01 07:00:00,2.07789,0.51206,0.07701,0.13001,0.84709,0.62407,2.43427,2.0679 + 2019-04-01 08:00:00,2.08712,0.51206,0.07701,0.02929,0.84709,0.63007,2.44927,2.0634 + 2019-04-01 09:00:00,2.09597,0.51206,0.07701,0.13502,0.84709,0.62107,2.41927,2.04996 + 2019-04-01 10:00:00,2.10444,0.50556,0.07701,0.02911,0.84709,0.63407,2.43427,2.04104 + 2019-04-01 11:00:00,2.11255,0.60507,0.07601,0.02903,0.84709,0.63407,2.41927,2.02772 + 2019-04-01 12:00:00,2.12029,0.63774,0.07601,0.02894,0.84709,0.62707,2.41927,2.01888 + 2019-04-01 13:00:00,2.12346,0.59182,0.07601,0.11601,0.85109,0.63707,2.38927,2.00568 + 2019-04-01 14:00:00,2.12662,0.55896,0.07601,0.11201,0.85109,0.63407,2.41927,1.99255 + 2019-04-01 15:00:00,2.1298,0.57073,0.07401,0.12301,0.85109,0.62707,2.40427,1.98384 + 2019-04-01 16:00:00,2.13297,0.5924,0.07401,0.12401,0.85109,0.63007,2.43427,1.97516 + 2019-04-01 17:00:00,2.13613,0.54539,0.07401,0.12901,0.84709,0.62707,2.41927,1.96652 + 2019-04-01 18:00:00,2.13929,0.53298,0.07401,0.12101,0.85109,0.63007,2.25725,1.95791 + 2019-04-01 19:00:00,2.14021,0.56206,0.07301,0.10801,0.84309,0.62107,2.25725,1.95791 + 2019-04-01 20:00:00,2.14111,0.56231,0.07301,0.12001,0.84309,0.62107,2.27225,1.95361 + 2019-04-01 21:00:00,2.142,0.52906,0.07301,0.10601,0.83909,0.61807,2.27225,1.94932""")) + self._deterministic_frame = pd.read_csv(text_data, + header=0, + skiprows=[1,], + nrows=60, + parse_dates=True, + index_col=0, + float_precision='high', + dtype={'GMT': str}).mul(1000) + return self._deterministic_frame + + def test_cnrfc_credentials(self): + """ + load sensitive info from .env file and test CNRFC credentials exist + """ + load_dotenv() + self.assertTrue(('CNRFC_USER' in os.environ) & ('CNRFC_PASSWORD' in os.environ)) + + def test_convert_date_columns(self): + """Ensure datetime data converted to string format""" + test_index = self.deterministic_frame.index.strftime('%Y-%m-%d') + self.assertEqual(test_index.tolist()[0], '2019-03-30') + + def test_validate_duration(self): + """ + function to properly format/case hourly or daily durations + """ + duration = 'Hourly' + self.assertEqual(cnrfc.cnrfc._validate_duration(duration), 'hourly') + + def test_validate_duration_invalid(self): + """ + test that invalid duration raises a ValueError + """ + bad_input = 'monthly' + self.assertRaises(ValueError, + cnrfc.cnrfc._validate_duration, + bad_input) + + def test_get_deterministic_forecast(self): + """ + Test that deterministic forecast start from Graphical_RVF page matches + CSV start of forecast + """ + cnrfc_id = 'FOLC1' + first_ordinate = cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=True)[-1] + df = cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False)['data'] + first_forecast_entry = df['forecast'].dropna().index.tolist()[0] + + # check that the date/time representation in the timestamp and datetime.datetime objects are the same + self.assertEqual(first_forecast_entry.year, first_ordinate.year) + self.assertEqual(first_forecast_entry.month, first_ordinate.month) + self.assertEqual(first_forecast_entry.day, first_ordinate.day) + self.assertEqual(first_forecast_entry.hour, first_ordinate.hour) + self.assertEqual(first_forecast_entry.minute, first_ordinate.minute) + + # for now, strip the local tzinfo from `first_ordinate` + self.assertEqual(first_forecast_entry.tzinfo, first_ordinate.replace(tzinfo=None).tzinfo) + + def test_get_deterministic_forecast_watershed(self): + """ + test watershed deterministic forecast download for North San Joaquin on a particular date + """ + df = cnrfc.get_deterministic_forecast_watershed('N_SanJoaquin', '2019040412')['data'] + self.assertEqual(df.head(20)['NHGC1'].values.tolist(), + self.deterministic_frame.head(20)['NHGC1'].values.tolist()) + + def test_get_water_year_trend_tabular(self): + """ + test watershed deterministic forecast download for North San Joaquin on a + particular date + """ + df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] + self.assertEqual(df.shape, (365, 9)) + + +# class TestCASGEM(unittest.TestCase): + +# def test(self): +# pass + + +# class TestCAWDL(unittest.TestCase): + +# def test(self): +# pass + + +# class TestCDEC(unittest.TestCase): + +# def test(self): +# pass + + +# class TestCVO(unittest.TestCase): + +# def test(self): +# pass + + +# class TestNID(unittest.TestCase): + +# def test(self): +# pass + + +# class TestSWP(unittest.TestCase): + +# def test(self): +# pass + + +class TestUSACE(unittest.TestCase): + + def test_get_water_year_data(self): + result = wcds.get_water_year_data('buc', 2021, interval='d') + self.assertEqual(result['data'].shape, (397, 16)) + + sample = result['data'].head(4) + self.assertEqual(result['data'].head(4)['Top of Conservation (ac-ft)'].tolist(), + [149521.45, 149042.90, 148564.35, 148085.80]) + + # does not include timezone handling + self.assertEqual(list(map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'), result['data'].head(4).index.tolist())), + ['2020-08-31 00:00:00', + '2020-09-01 00:00:00', + '2020-09-02 00:00:00', + '2020-09-03 00:00:00']) + + # does not include timezone handling + self.assertEqual(list(map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'), result['data'].tail(4).index.tolist())), + ['2021-09-28 00:00:00', + '2021-09-29 00:00:00', + '2021-09-30 00:00:00', + '2021-10-01 00:00:00']) + + def test_get_data(self): + result = wcds.get_wcds_data('sha', dt.datetime(2023, 1, 15), dt.datetime(2023, 2, 1), interval='d') + self.assertEqual(result['data'].shape, (398, 16)) + self.assertEqual(result['data']['Storage'].tolist()[:4], [1592122.0, 1590203.0, 1585627.0, 1582232.0]) + + def test_get_wcds_reservoirs(self): + """ + show that 35 reservoirs exist in the internal collect record for WCDS reservoirs + """ + self.assertEqual(wcds.get_wcds_reservoirs().shape[0], 35) + + def test_get_wcds_data(self): + result = wcds.get_wcds_data('sha', dt.datetime(2023, 1, 15), dt.datetime(2023, 2, 1), interval='d') + self.assertEqual(result['data'].shape, (398, 16)) + self.assertEqual(result['data']['Storage'].tolist()[:4], [1592122.0, 1590203.0, 1585627.0, 1582232.0]) + + def test_get_release_report(self): + self.assertEqual(wcds.get_release_report('buc')['info']['units'], 'cfs') + self.assertGreater(wcds.get_release_report('buc')['data'].shape[0], 0) + + def test_get_reservoir_metadata(self): + result = wcds.get_reservoir_metadata('nhg', 2022, interval='d') + self.assertEqual(int(result['gross pool (stor)']), 317100) + self.assertEqual(int(result['gross pool (elev)']), 713) + self.assertTrue('Precip @ Dam (in; elev 712 ft)' in result['data headers']) + + +# class TestUSGS(unittest.TestCase): + +# def test(self): +# pass + + +if __name__ == '__main__': + unittest.main() diff --git a/collect/usace/wcds.py b/collect/usace/wcds.py index 4a6f713..05efad5 100644 --- a/collect/usace/wcds.py +++ b/collect/usace/wcds.py @@ -7,8 +7,16 @@ import datetime as dt import io import re +import textwrap import pandas as pd + +import urllib3.contrib.pyopenssl +urllib3.contrib.pyopenssl.inject_into_urllib3() + import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry +import ssl from collect import utils @@ -32,7 +40,7 @@ def get_water_year_data(reservoir, water_year, interval='d'): url = f'https://www.spk-wc.usace.army.mil/plots/csv/{reservoir}{interval}_{water_year}.plot' # Read url data - response = requests.get(url, verify=False).content + response = requests.get(url, verify=ssl.CERT_NONE).content df = pd.read_csv(io.StringIO(response.decode('utf-8')), header=0, na_values=['-', 'M']) # Check that user chosen water year is within range with data @@ -131,42 +139,43 @@ def get_wcds_reservoirs(): Returns: (pandas.DataFrame): dataframe containing table of WCDS reservoirs """ - csv_data = StringIO("""Region|River Basin|Agency|Project|WCDS_ID|Hourly Data|Daily Data - Sacramento Valley|Sacramento River|USBR|Shasta Dam & Lake Shasta|SHA|False|True - Sacramento Valley|Stony Creek|COE|Black Butte Dam & Lake|BLB|True|True - Sacramento Valley|Feather River|DWR|Oroville Dam & LakeOroville|ORO|False|True - Sacramento Valley|Yuba River|YCWA|New Bullards Bar Dam & Lake|BUL|False|True - Sacramento Valley|Yuba River|COE|Englebright Lake|ENG|True|True - Sacramento Valley|N. F. Cache Creek|YCFCWCA|Indian Valley Dam & Reservoir|INV|False|True - Sacramento Valley|American River|USBR|Folsom Dam & Lake|FOL|True|True - Sacramento Valley|American River|USBR|Folsom Dam & Lake|FOLQ|True|False - San Joaquin Valley|Mokelumne River|EBMUD|Camanche Dam & Reservoir|CMN|False|True - San Joaquin Valley|Calaveras River|COE|New Hogan Dam & Lake|NHG|True|True - San Joaquin Valley|Littlejohn Creek|COE|Farmington Dam & Reservoir|FRM|True|True - San Joaquin Valley|Stanislaus River|USBR|New Melones Dam & Lake|NML|False|True - San Joaquin Valley|Stanislaus River|USBR|Tulloch Reservoir|TUL|False|True - San Joaquin Valley|Tuolumne River|TID|Don Pedro Dam & Lake|DNP|False|True - San Joaquin Valley|Merced River|MID|New Exchequer Dam, Lake McClure|EXC|False|True - San Joaquin Valley|Los Banos Creek|DWR|Los Banos Detention Reservoir|LBN|False|True - San Joaquin Valley|Burns Creek|COE|Burns Dam & Reservoir|BUR|True|True - San Joaquin Valley|Bear Creek|COE|Bear Dam & Reservoir|BAR|True|True - San Joaquin Valley|Owens Creek|COE|Owens Dam & Reservoir|OWN|True|True - San Joaquin Valley|Mariposa Creek|COE|Mariposa Dam & Reservoir|MAR|True|True - San Joaquin Valley|Chowchilla River|COE|Buchanan Dam, H.V. Eastman Lake|BUC|True|True - San Joaquin Valley|Fresno River|COE|Hidden Dam, Hensley Lake|HID|True|True - San Joaquin Valley|San Joaquin River|USBR|Friant Dam, Millerton Lake|MIL|False|True - San Joaquin Valley|Big Dry Creek|FMFCD|Big Dry Creek Dam & Reservoir|BDC|False|True - Tulare Lake Basin|Kings River|COE|Pine Flat Dam & Lake|PNF|True|True - Tulare Lake Basin|Kaweah River|COE|Terminus Dam, Lake Kaweah|TRM|True|True - Tulare Lake Basin|Tule River|COE|Success Dam & Lake|SCC|True|True - Tulare Lake Basin|Kern River|COE|Isabella Dam & Lake Isabella|ISB|True|True - North Coast Area|Russian River|COE|Coyote Valley Dam, Lake Mendocino|COY|True|True - North Coast Area|Russian River|COE|Warm Springs Dam, Lake Sonoma|WRS|True|True - North Coast Area|Alameda Creek|DWR|Del Valle Dam & Reservoir|DLV|False|True - Truckee River Basin|Martis Creek|COE|Martis Creek Dam & Lake|MRT|True|True - Truckee River Basin|Prosser Creek|USBR|Prosser Creek Dam & Reservoir|PRS|False|True - Truckee River Basin|LittleTruckee River|USBR|Stampede Dam & Reservoir|STP|False|True - Truckee River Basin|LittleTruckee River|USBR|Boca Dam & Reservoir|BOC|False|True""") + csv_data = io.StringIO(textwrap.dedent("""\ + Region|River Basin|Agency|Project|WCDS_ID|Hourly Data|Daily Data + Sacramento Valley|Sacramento River|USBR|Shasta Dam & Lake Shasta|SHA|False|True + Sacramento Valley|Stony Creek|COE|Black Butte Dam & Lake|BLB|True|True + Sacramento Valley|Feather River|DWR|Oroville Dam & LakeOroville|ORO|False|True + Sacramento Valley|Yuba River|YCWA|New Bullards Bar Dam & Lake|BUL|False|True + Sacramento Valley|Yuba River|COE|Englebright Lake|ENG|True|True + Sacramento Valley|N. F. Cache Creek|YCFCWCA|Indian Valley Dam & Reservoir|INV|False|True + Sacramento Valley|American River|USBR|Folsom Dam & Lake|FOL|True|True + Sacramento Valley|American River|USBR|Folsom Dam & Lake|FOLQ|True|False + San Joaquin Valley|Mokelumne River|EBMUD|Camanche Dam & Reservoir|CMN|False|True + San Joaquin Valley|Calaveras River|COE|New Hogan Dam & Lake|NHG|True|True + San Joaquin Valley|Littlejohn Creek|COE|Farmington Dam & Reservoir|FRM|True|True + San Joaquin Valley|Stanislaus River|USBR|New Melones Dam & Lake|NML|False|True + San Joaquin Valley|Stanislaus River|USBR|Tulloch Reservoir|TUL|False|True + San Joaquin Valley|Tuolumne River|TID|Don Pedro Dam & Lake|DNP|False|True + San Joaquin Valley|Merced River|MID|New Exchequer Dam, Lake McClure|EXC|False|True + San Joaquin Valley|Los Banos Creek|DWR|Los Banos Detention Reservoir|LBN|False|True + San Joaquin Valley|Burns Creek|COE|Burns Dam & Reservoir|BUR|True|True + San Joaquin Valley|Bear Creek|COE|Bear Dam & Reservoir|BAR|True|True + San Joaquin Valley|Owens Creek|COE|Owens Dam & Reservoir|OWN|True|True + San Joaquin Valley|Mariposa Creek|COE|Mariposa Dam & Reservoir|MAR|True|True + San Joaquin Valley|Chowchilla River|COE|Buchanan Dam, H.V. Eastman Lake|BUC|True|True + San Joaquin Valley|Fresno River|COE|Hidden Dam, Hensley Lake|HID|True|True + San Joaquin Valley|San Joaquin River|USBR|Friant Dam, Millerton Lake|MIL|False|True + San Joaquin Valley|Big Dry Creek|FMFCD|Big Dry Creek Dam & Reservoir|BDC|False|True + Tulare Lake Basin|Kings River|COE|Pine Flat Dam & Lake|PNF|True|True + Tulare Lake Basin|Kaweah River|COE|Terminus Dam, Lake Kaweah|TRM|True|True + Tulare Lake Basin|Tule River|COE|Success Dam & Lake|SCC|True|True + Tulare Lake Basin|Kern River|COE|Isabella Dam & Lake Isabella|ISB|True|True + North Coast Area|Russian River|COE|Coyote Valley Dam, Lake Mendocino|COY|True|True + North Coast Area|Russian River|COE|Warm Springs Dam, Lake Sonoma|WRS|True|True + North Coast Area|Alameda Creek|DWR|Del Valle Dam & Reservoir|DLV|False|True + Truckee River Basin|Martis Creek|COE|Martis Creek Dam & Lake|MRT|True|True + Truckee River Basin|Prosser Creek|USBR|Prosser Creek Dam & Reservoir|PRS|False|True + Truckee River Basin|LittleTruckee River|USBR|Stampede Dam & Reservoir|STP|False|True + Truckee River Basin|LittleTruckee River|USBR|Boca Dam & Reservoir|BOC|False|True""")) return pd.read_csv(csv_data, header=0, delimiter='|', index_col='WCDS_ID') @@ -257,6 +266,7 @@ def get_release_report(reservoir): # default without dataframe parsing return {'data': raw, 'info': info} + def get_reservoir_metadata(reservoir, water_year, interval='d'): """ Retrieves website metadata from USACE-SPK's WCDS. @@ -275,24 +285,29 @@ def get_reservoir_metadata(reservoir, water_year, interval='d'): # USACE-SPK Folsom page url = f'https://www.spk-wc.usace.army.mil/plots/csv/{reservoir}{interval}_{water_year}.meta' - # read url data - response = requests.get(url, verify=False) + # read data from url using requests session with retries + session = requests.Session() + retries = Retry(total=5, + backoff_factor=0.1, + status_forcelist=[500, 502, 503, 504]) + session.mount('https://', HTTPAdapter(max_retries=retries)) + response = session.get(url, verify=ssl.CERT_NONE) # complete metadata dictionary metadata_dict = response.json() - - result = {'data headers': metadata_dict['allheaders'], - 'gross pool (stor)': metadata_dict['ymarkers']['Gross Pool']['value'], - 'generated': metadata_dict['generated'], - 'datum': None - } + result = { + 'data headers': metadata_dict['allheaders'], + 'gross pool (stor)': metadata_dict['ymarkers']['Gross Pool']['value'], + 'generated': metadata_dict['generated'], + 'datum': None + } # check for changing elevation key if 'Gross Pool(elev NGVD29)' in metadata_dict['ymarkers']: - result.update({ 'gross pool (elev)': metadata_dict['ymarkers']['Gross Pool(elev NGVD29)']['value']}) - result.update({'datum': 'NGVD29'}) + result.update({'gross pool (elev)': metadata_dict['ymarkers']['Gross Pool(elev NGVD29)']['value'], + 'datum': 'NGVD29'}) else: - result.update({ 'gross pool (elev)': metadata_dict['ymarkers']['Gross Pool']['value']}) + result.update({'gross pool (elev)': metadata_dict['ymarkers']['Gross Pool(elev)']['value']}) # returns relevant subset as dictionary return result \ No newline at end of file diff --git a/tests/2019040412_N_SanJoaquin_csv_export.csv b/tests/2019040412_N_SanJoaquin_csv_export.csv deleted file mode 100755 index ae6c415..0000000 --- a/tests/2019040412_N_SanJoaquin_csv_export.csv +++ /dev/null @@ -1,363 +0,0 @@ -GMT,CMPC1,NHGC1,MSGC1,FRGC1,EDOC1,SOSC1,MHBC1,MCNC1 -,QINE,QINE,QINE,QINE,QINE,QINE,QINE,QINE -2019-03-30 12:00:00,2.45972,0.70641,0.08901,0.22803,1.03512,0.71908,2.83132,2.58248 -2019-03-30 13:00:00,2.44774,0.67366,0.08901,0.21302,1.03512,0.70908,2.88032,2.56875 -2019-03-30 14:00:00,2.43568,0.67408,0.08901,0.19602,1.03011,0.71208,2.84732,2.53694 -2019-03-30 15:00:00,2.42353,0.67424,0.08901,0.22903,1.02611,0.70608,2.83132,2.52791 -2019-03-30 16:00:00,2.41129,0.67558,0.08901,0.20202,1.02211,0.70208,2.83132,2.50098 -2019-03-30 17:00:00,2.39895,0.60832,0.08901,0.21002,1.01811,0.70208,2.81431,2.4876 -2019-03-30 18:00:00,2.38652,0.64266,0.08901,0.18302,1.00911,0.69608,2.83132,2.46544 -2019-03-30 19:00:00,2.38077,0.67591,0.08701,0.20202,1.00511,0.69208,2.79831,2.45222 -2019-03-30 20:00:00,2.37473,0.67491,0.08701,0.18602,1.00111,0.69208,2.79831,2.44343 -2019-03-30 21:00:00,2.36843,0.67599,0.08601,0.19602,0.99211,0.68908,2.79831,2.42595 -2019-03-30 22:00:00,2.36185,0.67599,0.08601,0.03374,0.99211,0.68208,2.74931,2.41724 -2019-03-30 23:00:00,2.35498,0.71033,0.08601,0.19102,0.98411,0.68208,2.78231,2.40856 -2019-03-31 00:00:00,2.34785,0.67608,0.08401,0.16702,0.98011,0.67608,2.74931,2.39559 -2019-03-31 01:00:00,2.32832,0.67508,0.08401,0.19902,0.97111,0.66607,2.7163,2.38698 -2019-03-31 02:00:00,2.30886,0.67608,0.08401,0.16302,0.96311,0.65907,2.7003,2.36982 -2019-03-31 03:00:00,2.28949,0.64274,0.08401,0.19302,0.96311,0.65607,2.7163,2.36555 -2019-03-31 04:00:00,2.2702,0.6084,0.08401,0.03239,0.95511,0.66907,2.7163,2.34852 -2019-03-31 05:00:00,2.25098,0.60724,0.08401,0.17702,0.94711,0.65907,2.6843,2.34004 -2019-03-31 06:00:00,2.23185,0.64141,0.08401,0.15302,0.9261,0.65907,2.6683,2.33159 -2019-03-31 07:00:00,2.22434,0.60915,0.08401,0.16402,0.9141,0.65607,2.6843,2.31896 -2019-03-31 08:00:00,2.21675,0.5749,0.08201,0.17202,0.9141,0.66207,2.62029,2.3022 -2019-03-31 09:00:00,2.2091,0.60815,0.08201,0.15802,0.9101,0.65907,2.63629,2.2897 -2019-03-31 10:00:00,2.20137,0.64241,0.08101,0.16702,0.9141,0.65907,2.58829,2.27725 -2019-03-31 11:00:00,2.19357,0.60924,0.08101,0.16802,0.9141,0.65907,2.57229,2.26486 -2019-03-31 12:00:00,2.1857,0.57507,0.08101,0.15402,0.9101,0.65307,2.57229,2.25253 -2019-03-31 13:00:00,2.17421,0.60832,0.08101,0.15102,0.9141,0.65307,2.58829,2.23544 -2019-03-31 14:00:00,2.16274,0.64257,0.08101,0.18902,0.9101,0.65607,2.55728,2.21627 -2019-03-31 15:00:00,2.15131,0.60832,0.08101,0.03094,0.9101,0.64907,2.57229,2.20199 -2019-03-31 16:00:00,2.1399,0.54081,0.08101,0.14802,0.9061,0.64307,2.55728,2.18779 -2019-03-31 17:00:00,2.12853,0.54081,0.08101,0.03072,0.9061,0.64607,2.57229,2.16429 -2019-03-31 18:00:00,2.11718,0.57515,0.08101,0.14502,0.8981,0.64607,2.57229,2.15495 -2019-03-31 19:00:00,2.11344,0.57523,0.08101,0.15802,0.9021,0.64007,2.55728,2.13637 -2019-03-31 20:00:00,2.10957,0.57531,0.07901,0.14302,0.8981,0.64307,2.54128,2.13174 -2019-03-31 21:00:00,2.10557,0.5764,0.07901,0.16502,0.8861,0.63707,2.55728,2.12713 -2019-03-31 22:00:00,2.10143,0.63047,0.07901,0.15202,0.8901,0.62707,2.54128,2.11793 -2019-03-31 23:00:00,2.09715,0.6617,0.07901,0.13502,0.8821,0.62707,2.54128,2.11793 -2019-04-01 00:00:00,2.09274,0.64507,0.07901,0.03001,0.8781,0.61807,2.51028,2.11334 -2019-04-01 01:00:00,2.08882,0.61182,0.07701,0.02992,0.8741,0.62107,2.52628,2.10875 -2019-04-01 02:00:00,2.08483,0.51206,0.07701,0.02983,0.8701,0.61807,2.49528,2.09962 -2019-04-01 03:00:00,2.08079,0.51205,0.07701,0.02974,0.8661,0.61207,2.48028,2.09506 -2019-04-01 04:00:00,2.07668,0.51206,0.07701,0.02964,0.8621,0.61207,2.49528,2.09051 -2019-04-01 05:00:00,2.07251,0.51206,0.07701,0.02955,0.8541,0.61507,2.48028,2.08144 -2019-04-01 06:00:00,2.06829,0.51206,0.07701,0.02946,0.85109,0.62107,2.44927,2.07692 -2019-04-01 07:00:00,2.07789,0.51206,0.07701,0.13001,0.84709,0.62407,2.43427,2.0679 -2019-04-01 08:00:00,2.08712,0.51206,0.07701,0.02929,0.84709,0.63007,2.44927,2.0634 -2019-04-01 09:00:00,2.09597,0.51206,0.07701,0.13502,0.84709,0.62107,2.41927,2.04996 -2019-04-01 10:00:00,2.10444,0.50556,0.07701,0.02911,0.84709,0.63407,2.43427,2.04104 -2019-04-01 11:00:00,2.11255,0.60507,0.07601,0.02903,0.84709,0.63407,2.41927,2.02772 -2019-04-01 12:00:00,2.12029,0.63774,0.07601,0.02894,0.84709,0.62707,2.41927,2.01888 -2019-04-01 13:00:00,2.12346,0.59182,0.07601,0.11601,0.85109,0.63707,2.38927,2.00568 -2019-04-01 14:00:00,2.12662,0.55896,0.07601,0.11201,0.85109,0.63407,2.41927,1.99255 -2019-04-01 15:00:00,2.1298,0.57073,0.07401,0.12301,0.85109,0.62707,2.40427,1.98384 -2019-04-01 16:00:00,2.13297,0.5924,0.07401,0.12401,0.85109,0.63007,2.43427,1.97516 -2019-04-01 17:00:00,2.13613,0.54539,0.07401,0.12901,0.84709,0.62707,2.41927,1.96652 -2019-04-01 18:00:00,2.13929,0.53298,0.07401,0.12101,0.85109,0.63007,2.25725,1.95791 -2019-04-01 19:00:00,2.14021,0.56206,0.07301,0.10801,0.84309,0.62107,2.25725,1.95791 -2019-04-01 20:00:00,2.14111,0.56231,0.07301,0.12001,0.84309,0.62107,2.27225,1.95361 -2019-04-01 21:00:00,2.142,0.52906,0.07301,0.10601,0.83909,0.61807,2.27225,1.94932 -2019-04-01 22:00:00,2.14287,0.49506,0.07301,0.11401,0.83909,0.62407,2.22925,1.95361 -2019-04-01 23:00:00,2.14373,0.52873,0.07301,0.12401,0.83509,0.61807,2.25725,1.95361 -2019-04-02 00:00:00,2.14457,0.59773,0.07301,0.10101,0.83109,0.61807,2.25725,1.94932 -2019-04-02 01:00:00,2.15408,0.701,0.07401,0.13802,0.83109,0.61807,2.25725,1.95361 -2019-04-02 02:00:00,2.16084,0.66708,0.07301,0.07701,0.83509,0.63007,2.24325,1.95791 -2019-04-02 03:00:00,2.16485,0.59873,0.07101,0.13702,0.83509,0.63007,2.22925,1.95791 -2019-04-02 04:00:00,2.16612,0.59982,0.06601,0.09201,0.83909,0.65307,2.24325,1.96221 -2019-04-02 05:00:00,2.16462,0.59982,0.06201,0.11501,0.83509,0.67608,2.25725,1.95791 -2019-04-02 06:00:00,2.16039,0.59982,0.05701,0.12301,0.83909,0.69908,2.24325,1.95791 -2019-04-02 07:00:00,2.26058,0.49697,0.05201,0.11001,0.85109,0.71908,2.24325,1.95361 -2019-04-02 08:00:00,2.35704,0.56465,0.04901,0.09701,0.8741,0.74408,2.25725,1.95361 -2019-04-02 09:00:00,2.44978,0.70199,0.044,0.10801,0.9021,0.77209,2.27225,1.94932 -2019-04-02 10:00:00,2.5388,0.73541,0.041,0.12201,0.9311,0.80109,2.28625,1.94505 -2019-04-02 11:00:00,2.62409,0.70124,0.038,0.09601,0.95911,0.84509,2.30026,1.94077 -2019-04-02 12:00:00,2.70567,0.63382,0.035,0.02806,0.98811,0.8601,2.38927,1.94077 -2019-04-02 13:00:00,2.92343,0.63499,0.033,0.11001,1.03512,0.8831,2.43427,1.93651 -2019-04-02 14:00:00,3.1496,0.63507,0.031,0.10901,1.07812,0.9261,2.46427,1.94077 -2019-04-02 15:00:00,3.38419,0.63407,0.028,0.10801,1.12112,0.9381,2.58829,1.94077 -2019-04-02 16:00:00,3.62718,0.66941,0.027,0.08901,1.16613,0.97811,2.6523,1.94505 -2019-04-02 17:00:00,3.87859,0.73808,0.025,0.11301,1.20713,1.03612,2.74931,1.95791 -2019-04-02 18:00:00,4.13842,0.73708,0.025,0.10401,1.26214,1.06112,2.83132,1.98819 -2019-04-02 19:00:00,4.14297,0.77133,0.024,0.11501,1.31815,1.09612,2.89732,2.02772 -2019-04-02 20:00:00,4.1457,0.80659,0.023,0.09101,1.38915,1.12213,2.99833,2.07692 -2019-04-02 21:00:00,4.14662,0.77225,0.022,0.11501,1.45816,1.16713,3.08534,2.13637 -2019-04-02 22:00:00,4.14571,0.77125,0.02297,0.02861,1.57818,1.21714,3.15435,2.21627 -2019-04-02 23:00:00,4.14298,0.80651,0.024,0.10301,1.67719,1.21214,3.29537,2.28555 -2019-04-03 00:00:00,4.13842,0.8761,0.024,0.11601,1.73419,1.22214,3.34937,2.36128 -2019-04-03 01:00:00,4.14003,0.87518,0.024,0.10401,1.7922,1.25414,3.53339,2.43905 -2019-04-03 02:00:00,4.141,0.83993,0.023,0.10901,1.8302,1.29714,3.74442,2.52341 -2019-04-03 03:00:00,4.14132,0.87518,0.023,0.11001,1.84621,1.33615,3.84243,2.6194 -2019-04-03 04:00:00,4.14101,0.91043,0.022,0.02746,1.86321,1.36515,4.00245,2.70186 -2019-04-03 05:00:00,4.14004,0.91043,0.022,0.11701,1.87421,1.38515,4.06345,2.77045 -2019-04-03 06:00:00,4.13842,0.90943,0.022,0.10501,1.89621,1.35015,4.20847,2.84348 -2019-04-03 07:00:00,4.12428,0.94369,0.022,0.10501,1.90721,1.35515,4.25047,2.93752 -2019-04-03 08:00:00,4.11008,1.01328,0.022,0.10301,1.90121,1.34515,4.40049,3.0381 -2019-04-03 09:00:00,4.09578,1.01336,0.022,0.09301,1.90721,1.33615,4.4665,3.15857 -2019-04-03 10:00:00,4.08141,1.01336,0.022,0.10501,1.89621,1.29314,4.31448,3.30308 -2019-04-03 11:00:00,4.06696,0.97894,0.022,0.11801,1.88521,1.28314,4.5095,3.42141 -2019-04-03 12:00:00,4.05243,0.94552,0.022,0.02609,1.86821,1.27814,4.40049,3.53851 -2019-04-03 13:00:00,4.01144,0.91218,0.023,0.09001,1.85221,1.25914,4.37949,3.63033 -2019-04-03 14:00:00,3.97045,0.87785,0.023,0.11701,1.8302,1.23114,4.36995,3.71404 -2019-04-03 15:00:00,3.92946,0.9121,0.024,0.10401,1.8142,1.24014,4.35749,3.7841 -2019-04-03 16:00:00,3.88846,0.87785,0.025,0.10401,1.8032,1.21714,4.25047,3.82967 -2019-04-03 17:00:00,3.84747,0.87801,0.02598,0.10301,1.7922,1.21214,4.37949,3.88081 -2019-04-03 18:00:00,3.80649,0.87818,0.02697,0.0257,1.7652,1.20813,4.31448,3.91175 -2019-04-03 19:00:00,3.76532,0.87826,0.02797,0.10401,1.75019,1.19913,4.27148,3.93248 -2019-04-03 20:00:00,3.72414,0.87843,0.02897,0.09001,1.73919,1.17613,4.20847,3.93767 -2019-04-03 21:00:00,3.68297,0.87859,0.02998,0.10401,1.72319,1.18513,4.29348,3.93767 -2019-04-03 22:00:00,3.6418,0.87868,0.031,0.10301,1.69719,1.14913,4.31448,3.92729 -2019-04-03 23:00:00,3.60062,0.84451,0.029,0.11601,1.68719,1.14913,4.14546,3.91175 -2019-04-04 00:00:00,3.55945,0.84468,0.03,0.09301,1.67719,1.14013,4.25047,3.88595 -2019-04-04 01:00:00,3.53991,0.81051,0.0299,0.11601,1.66119,1.13113,4.12546,3.86029 -2019-04-04 02:00:00,3.52036,0.77625,0.0298,0.10501,1.65518,1.10912,3.96144,3.83476 -2019-04-04 03:00:00,3.50083,0.742,0.02973,0.10601,1.63418,1.12713,3.88143,3.81443 -2019-04-04 04:00:00,3.48128,0.70774,0.02969,0.0252,1.61418,1.12213,3.88143,3.7841 -2019-04-04 05:00:00,3.46174,0.743,0.02967,0.11901,1.60818,1.10912,3.78342,3.76397 -2019-04-04 06:00:00,3.44219,0.74392,0.02966,0.09801,1.59318,1.10912,3.76342,3.72896 -2019-04-04 07:00:00,3.43899,0.74392,0.02966,0.12001,1.58818,1.09112,3.70541,3.70411 -2019-04-04 08:00:00,3.43578,0.77825,0.02966,0.09701,1.57818,1.10012,3.5904,3.66953 -2019-04-04 09:00:00,3.43258,0.77825,0.02968,0.12101,1.56717,1.08312,3.68641,3.63521 -2019-04-04 10:00:00,3.42938,0.76685,0.02974,0.10901,1.56217,1.09112,3.68641,3.59628 -2019-04-04 11:00:00,3.42617,0.75583,0.02982,0.12001,1.56217,1.08312,3.64741,3.56249 -2019-04-04 12:00:00,3.42297,0.74513,0.02992,0.02489,1.55717,1.07012,3.5714,3.52895 -2019-04-04 13:00:00,3.41586,0.73476,0.03001,0.02485,1.51371,1.07012,3.6094,3.49508 -2019-04-04 14:00:00,3.40873,0.72469,0.03009,0.02482,1.47031,1.06051,3.5714,3.46862 -2019-04-04 15:00:00,3.40161,0.7149,0.03017,0.02478,1.42738,1.05344,3.51963,3.40148 -2019-04-04 16:00:00,3.39448,0.70538,0.03023,0.02474,1.38508,1.04752,3.46692,3.35061 -2019-04-04 17:00:00,3.38736,0.69616,0.03028,0.02471,1.34324,1.04032,3.41252,3.32781 -2019-04-04 18:00:00,3.38023,0.68725,0.03032,0.02467,1.30145,1.03143,3.36183,3.30576 -2019-04-04 19:00:00,3.36237,0.67866,0.03034,0.02476,1.25927,1.01611,3.31185,3.27278 -2019-04-04 20:00:00,3.3445,0.67039,0.03036,0.02485,1.21683,1.00524,3.25818,3.24343 -2019-04-04 21:00:00,3.32664,0.66238,0.03037,0.02494,1.17467,0.99749,3.20256,3.22211 -2019-04-04 22:00:00,3.30878,0.65477,0.03038,0.02504,1.13308,0.99149,3.14776,3.19368 -2019-04-04 23:00:00,3.29091,0.64787,0.03039,0.02513,1.09202,0.98487,3.09408,3.15809 -2019-04-05 00:00:00,3.27305,0.64178,0.03041,0.02522,1.05133,0.97721,3.03936,3.117 -2019-04-05 01:00:00,3.2783,0.63668,0.03045,0.02519,1.04604,0.96594,2.98461,3.0723 -2019-04-05 02:00:00,3.28355,0.63265,0.0305,0.02515,1.04104,0.9659,2.93128,3.02564 -2019-04-05 03:00:00,3.2888,0.62945,0.03058,0.02512,1.03668,0.95795,2.87719,2.97714 -2019-04-05 04:00:00,3.29405,0.62647,0.03066,0.02509,1.03316,0.95229,2.84445,2.92653 -2019-04-05 05:00:00,3.2993,0.62327,0.03074,0.02505,1.03054,0.94825,2.82578,2.87426 -2019-04-05 06:00:00,3.30455,0.61972,0.03081,0.02502,1.02895,0.94548,2.80829,2.82106 -2019-04-05 07:00:00,3.30761,0.61567,0.03087,0.0249,1.02861,0.94418,2.79422,2.76766 -2019-04-05 08:00:00,3.31067,0.61105,0.03092,0.02477,1.0294,0.94529,2.78084,2.71366 -2019-04-05 09:00:00,3.31374,0.60616,0.03095,0.02465,1.03088,0.94639,2.76319,2.65941 -2019-04-05 10:00:00,3.3168,0.60081,0.03096,0.02453,1.0327,0.94725,2.7479,2.60508 -2019-04-05 11:00:00,3.31986,0.59464,0.03097,0.0244,1.03478,0.94784,2.73831,2.55365 -2019-04-05 12:00:00,3.32292,0.58757,0.03096,0.02428,1.03726,0.94819,2.73303,2.50933 -2019-04-05 13:00:00,3.30875,0.57957,0.03093,0.02426,1.0403,0.94829,2.73032,2.47302 -2019-04-05 14:00:00,3.29458,0.57053,0.03089,0.02424,1.04374,0.94819,2.72786,2.44255 -2019-04-05 15:00:00,3.2804,0.56073,0.03084,0.02423,1.04711,0.94796,2.72267,2.41684 -2019-04-05 16:00:00,3.26623,0.55074,0.03078,0.02421,1.05011,0.94748,2.71685,2.39402 -2019-04-05 17:00:00,3.25206,0.54102,0.03071,0.02419,1.05267,0.94647,2.71394,2.373 -2019-04-05 18:00:00,3.23789,0.53181,0.03066,0.02417,1.05475,0.94456,2.71302,2.35441 -2019-04-05 19:00:00,3.24927,0.52324,0.03061,0.02502,1.0568,0.94091,2.71225,2.34071 -2019-04-05 20:00:00,3.26065,0.51548,0.03058,0.02586,1.06005,0.93679,2.71114,2.33025 -2019-04-05 21:00:00,3.27203,0.50846,0.03057,0.02671,1.0666,0.93422,2.74935,2.32253 -2019-04-05 22:00:00,3.2834,0.50272,0.03057,0.02755,1.07877,0.93476,2.79419,2.31639 -2019-04-05 23:00:00,3.29478,0.49932,0.0306,0.0284,1.09907,0.94023,2.81226,2.3109 -2019-04-06 00:00:00,3.30616,0.49923,0.03068,0.02924,1.13247,0.95275,2.82563,2.3062 -2019-04-06 01:00:00,3.35691,0.50757,0.03083,0.0566,1.18488,1.01335,2.86066,2.30478 -2019-04-06 02:00:00,3.40766,0.53523,0.03105,0.08397,1.27912,1.04488,2.93604,2.30455 -2019-04-06 03:00:00,3.45841,0.59439,0.03134,0.11133,1.37284,1.06551,3.08653,2.30518 -2019-04-06 04:00:00,3.50916,0.68795,0.03167,0.13869,1.4476,1.07768,3.18775,2.31197 -2019-04-06 05:00:00,3.55991,0.79103,0.03199,0.16606,1.50576,1.08732,3.35234,2.32973 -2019-04-06 06:00:00,3.61066,0.87861,0.03226,0.19342,1.54982,1.09435,3.51575,2.35245 -2019-04-06 07:00:00,3.60815,0.93395,0.03244,0.18685,1.58023,1.09781,3.63691,2.38818 -2019-04-06 08:00:00,3.60564,0.95917,0.03252,0.18027,1.59787,1.10004,3.69735,2.42565 -2019-04-06 09:00:00,3.60312,0.96424,0.03251,0.1737,1.60522,1.10067,3.73706,2.47323 -2019-04-06 10:00:00,3.60061,0.95553,0.03244,0.16713,1.60477,1.10032,3.75776,2.54453 -2019-04-06 11:00:00,3.5981,0.93648,0.03233,0.16055,1.59694,1.09923,3.76666,2.63643 -2019-04-06 12:00:00,3.59559,0.91006,0.03219,0.15398,1.5788,1.09663,3.76242,2.7451 -2019-04-06 13:00:00,3.56683,0.87901,0.03206,0.14393,1.54671,1.09043,3.74305,2.88412 -2019-04-06 14:00:00,3.53808,0.846,0.03192,0.13387,1.50486,1.08621,3.70127,3.03183 -2019-04-06 15:00:00,3.50933,0.8135,0.03178,0.12382,1.46323,1.08318,3.6558,3.17103 -2019-04-06 16:00:00,3.48057,0.78309,0.03165,0.11376,1.42711,1.0805,3.60816,3.29252 -2019-04-06 17:00:00,3.45181,0.75508,0.03153,0.1037,1.39672,1.07676,3.56049,3.39569 -2019-04-06 18:00:00,3.42306,0.72944,0.03143,0.09365,1.37091,1.0715,3.51322,3.48136 -2019-04-06 19:00:00,3.39462,0.70637,0.03134,0.08868,1.34879,1.0614,3.46671,3.54445 -2019-04-06 20:00:00,3.36617,0.68605,0.03126,0.0837,1.33043,1.05457,3.41877,3.59205 -2019-04-06 21:00:00,3.33773,0.6682,0.03119,0.07873,1.31579,1.04986,3.38377,3.6232 -2019-04-06 22:00:00,3.30929,0.65228,0.03113,0.07376,1.30379,1.04615,3.35003,3.63887 -2019-04-06 23:00:00,3.28084,0.63783,0.03108,0.06878,1.2933,1.04146,3.3204,3.64268 -2019-04-07 00:00:00,3.2524,0.62441,0.03104,0.06381,1.28342,1.03538,3.29279,3.63766 -2019-04-07 01:00:00,3.24039,0.61178,0.031,0.06148,1.27345,1.02428,3.26621,3.61276 -2019-04-07 02:00:00,3.22839,0.59973,0.03096,0.05915,1.26355,1.01682,3.23932,3.58358 -2019-04-07 03:00:00,3.21638,0.58813,0.03093,0.05682,1.25436,1.01187,3.21206,3.55125 -2019-04-07 04:00:00,3.20437,0.57686,0.0309,0.05448,1.24592,1.00843,3.18702,3.51829 -2019-04-07 05:00:00,3.19237,0.56584,0.03087,0.05215,1.23797,1.00465,3.16412,3.48692 -2019-04-07 06:00:00,3.18036,0.55512,0.03084,0.04982,1.23011,1.0004,3.1412,3.45747 -2019-04-07 07:00:00,3.17218,0.54479,0.03082,0.04819,1.22186,0.99347,3.11832,3.42145 -2019-04-07 08:00:00,3.16401,0.53494,0.0308,0.04656,1.21353,0.98883,3.09561,3.38745 -2019-04-07 09:00:00,3.15583,0.52555,0.03078,0.04493,1.20581,0.98576,3.07181,3.35502 -2019-04-07 10:00:00,3.14765,0.51877,0.03076,0.0433,1.19927,0.98342,3.04962,3.3236 -2019-04-07 11:00:00,3.13948,0.5123,0.03073,0.04167,1.19496,0.98051,3.03014,3.29317 -2019-04-07 12:00:00,3.1313,0.50608,0.03071,0.04004,1.19008,0.97681,3.01196,3.26407 -2019-04-07 13:00:00,3.11471,0.5001,0.03069,0.03944,1.1846,0.97041,2.99489,3.23286 -2019-04-07 14:00:00,3.09811,0.49515,0.03067,0.03884,1.17874,0.9661,2.97964,3.20254 -2019-04-07 15:00:00,3.08152,0.49016,0.03065,0.03825,1.17298,0.96311,2.96149,3.17289 -2019-04-07 16:00:00,3.06492,0.48516,0.03062,0.03765,1.16754,0.96077,2.94358,3.14359 -2019-04-07 17:00:00,3.04833,0.4802,0.0306,0.03705,1.16233,0.95947,2.92763,3.11462 -2019-04-07 18:00:00,3.03173,0.47528,0.03058,0.03645,1.15704,0.95646,2.91236,3.08649 -2019-04-07 19:00:00,3.02021,0.47044,0.03056,0.03617,1.15131,0.95124,2.89747,3.05878 -2019-04-07 20:00:00,3.00869,0.46569,0.03053,0.03589,1.14524,0.94529,2.8828,3.03225 -2019-04-07 21:00:00,2.99717,0.46102,0.03051,0.03561,1.1393,0.94044,2.86556,3.007 -2019-04-07 22:00:00,2.98565,0.45646,0.03049,0.03532,1.13374,0.93683,2.84859,2.98249 -2019-04-07 23:00:00,2.97413,0.45199,0.03047,0.03504,1.1285,0.93388,2.83434,2.95813 -2019-04-08 00:00:00,2.96261,0.44763,0.03045,0.03476,1.12339,0.93096,2.82054,2.93422 -2019-04-08 01:00:00,2.97185,0.44336,0.03043,0.03458,1.11813,0.92748,2.80626,2.91123 -2019-04-08 02:00:00,2.98109,0.4392,0.03041,0.03439,1.11283,0.92401,2.79139,2.88886 -2019-04-08 03:00:00,2.99033,0.43513,0.03039,0.03421,1.10789,0.92139,2.77452,2.86705 -2019-04-08 04:00:00,2.99957,0.43116,0.03036,0.03402,1.10349,0.91975,2.75739,2.84529 -2019-04-08 05:00:00,3.00881,0.42728,0.03034,0.03384,1.09965,0.91896,2.74324,2.82322 -2019-04-08 06:00:00,3.01805,0.42349,0.03032,0.03365,1.09628,0.919,2.73123,2.80139 -2019-04-08 07:00:00,3.03213,0.41979,0.03029,0.0335,1.09332,0.92021,2.72011,2.78115 -2019-04-08 08:00:00,3.04621,0.41618,0.03027,0.03336,1.09077,0.92198,2.70906,2.76133 -2019-04-08 09:00:00,3.0603,0.41266,0.03025,0.03321,1.08862,0.92354,2.69684,2.74168 -2019-04-08 10:00:00,3.07438,0.40922,0.03023,0.03306,1.08681,0.92478,2.68474,2.72176 -2019-04-08 11:00:00,3.08846,0.40586,0.0302,0.03292,1.08528,0.92583,2.67577,2.70134 -2019-04-08 12:00:00,3.10254,0.40257,0.03018,0.03277,1.08396,0.92692,2.67015,2.68094 -2019-04-08 13:00:00,3.09887,0.39936,0.03016,0.03264,1.08279,0.92826,2.66645,2.66284 -2019-04-08 14:00:00,3.09519,0.39623,0.03014,0.03252,1.08173,0.92962,2.66291,2.64571 -2019-04-08 15:00:00,3.09152,0.39317,0.03012,0.03239,1.08073,0.93064,2.65767,2.62941 -2019-04-08 16:00:00,3.08784,0.39018,0.0301,0.03226,1.07975,0.93122,2.65119,2.61354 -2019-04-08 17:00:00,3.08417,0.38726,0.03008,0.03214,1.0787,0.93136,2.64604,2.59783 -2019-04-08 18:00:00,3.08049,0.38441,0.03005,0.03201,1.07746,0.93099,2.64291,2.5827 -2019-04-08 19:00:00,3.07742,0.38162,0.03003,0.03189,1.07592,0.92982,2.64063,2.57039 -2019-04-08 20:00:00,3.07434,0.3789,0.03001,0.03177,1.07419,0.92833,2.63791,2.55978 -2019-04-08 21:00:00,3.07127,0.37623,0.02999,0.03165,1.07261,0.92717,2.63336,2.5507 -2019-04-08 22:00:00,3.0682,0.37368,0.02997,0.03152,1.07144,0.92664,2.62706,2.54247 -2019-04-08 23:00:00,3.06512,0.37132,0.02994,0.0314,1.07086,0.92692,2.62084,2.53444 -2019-04-09 00:00:00,3.06205,0.36921,0.02992,0.03128,1.07108,0.9283,2.61558,2.52658 -2019-04-09 01:00:00,3.10654,0.36739,0.0299,0.03127,1.07249,0.93164,2.61088,2.52026 -2019-04-09 02:00:00,3.15102,0.36589,0.02988,0.03126,1.07517,0.9371,2.60666,2.51478 -2019-04-09 03:00:00,3.19551,0.36466,0.02986,0.03125,1.07899,0.94662,2.60281,2.50994 -2019-04-09 04:00:00,3.24,0.36374,0.02984,0.03123,1.08397,0.99403,2.60001,2.50525 -2019-04-09 05:00:00,3.28448,0.36334,0.02983,0.03122,1.09046,1.07922,2.59991,2.50015 -2019-04-09 06:00:00,3.32897,0.36359,0.02983,0.03121,1.09945,1.24333,2.60361,2.49451 -2019-04-09 07:00:00,3.41889,0.36459,0.02984,0.03103,1.11316,1.58459,2.61116,2.48908 -2019-04-09 08:00:00,3.50881,0.36646,0.02987,0.03084,1.13326,1.79867,2.62567,2.48371 -2019-04-09 09:00:00,3.59873,0.36899,0.02992,0.03066,1.16122,1.93583,2.66336,2.47854 -2019-04-09 10:00:00,3.68865,0.37167,0.02998,0.03048,1.19971,2.03468,2.74824,2.47361 -2019-04-09 11:00:00,3.77857,0.37413,0.03004,0.03029,1.27958,2.13403,2.93997,2.46906 -2019-04-09 12:00:00,3.86849,0.37619,0.03008,0.03011,1.38334,2.25241,3.24643,2.46533 -2019-04-09 13:00:00,3.92816,0.37773,0.03011,0.03005,1.52818,2.3737,3.54312,2.46375 -2019-04-09 14:00:00,3.98782,0.37866,0.03012,0.02999,1.69921,2.44643,3.81553,2.46433 -2019-04-09 15:00:00,4.04749,0.379,0.03011,0.02993,1.8586,2.47833,4.11348,2.46801 -2019-04-09 16:00:00,4.10716,0.37881,0.03008,0.02987,1.98512,2.47657,4.50509,2.4782 -2019-04-09 17:00:00,4.16682,0.37811,0.03005,0.02981,2.12017,2.44104,4.83879,2.5031 -2019-04-09 18:00:00,4.22649,0.37696,0.03002,0.02975,2.1637,2.33794,5.07894,2.5605 -2019-04-09 19:00:00,4.22055,0.3754,0.03,0.02962,2.13749,2.20162,5.17723,2.67348 -2019-04-09 20:00:00,4.21461,0.37345,0.02998,0.02948,2.07355,2.1055,5.16627,2.84064 -2019-04-09 21:00:00,4.20867,0.37119,0.02998,0.02935,2.00659,2.03412,5.07995,3.04167 -2019-04-09 22:00:00,4.20272,0.36872,0.02998,0.02921,1.96852,1.96935,4.94555,3.26841 -2019-04-09 23:00:00,4.19678,0.36598,0.02999,0.02908,1.92678,1.88959,4.80428,3.53434 -2019-04-10 00:00:00,4.19084,0.36291,0.02999,0.02894,1.87751,1.77571,4.65859,3.83288 -2019-04-10 01:00:00,4.16509,0.35949,0.02998,0.02885,1.81877,1.61983,4.52955,4.09439 -2019-04-10 02:00:00,4.13934,0.35565,0.02995,0.02876,1.75544,1.51304,4.40957,4.32355 -2019-04-10 03:00:00,4.1136,0.35153,0.02992,0.02867,1.69656,1.44343,4.2855,4.50156 -2019-04-10 04:00:00,4.08785,0.34735,0.02987,0.02858,1.64504,1.39902,4.14474,4.62009 -2019-04-10 05:00:00,4.0621,0.34319,0.02983,0.02849,1.59907,1.35962,3.98129,4.68103 -2019-04-10 06:00:00,4.03635,0.33907,0.02978,0.0284,1.55536,1.32591,3.81334,4.69336 -2019-04-10 07:00:00,4.00339,0.33502,0.02973,0.02832,1.51098,1.29502,3.66839,4.6687 -2019-04-10 08:00:00,3.97042,0.331,0.02969,0.02824,1.46765,1.27378,3.55299,4.61765 -2019-04-10 09:00:00,3.93746,0.3271,0.02965,0.02816,1.42948,1.2594,3.4581,4.54956 -2019-04-10 10:00:00,3.9045,0.32342,0.02962,0.02808,1.39732,1.24449,3.37436,4.46907 -2019-04-10 11:00:00,3.87153,0.32003,0.02958,0.028,1.36997,1.22032,3.29957,4.37675 -2019-04-10 12:00:00,3.83857,0.31693,0.02955,0.02792,1.34602,1.18409,3.23527,4.27096 -2019-04-10 13:00:00,3.79378,0.3141,0.02953,0.02785,1.32444,1.11485,3.18225,4.15156 -2019-04-10 14:00:00,3.74898,0.31157,0.0295,0.02777,1.30524,1.06804,3.13881,4.02348 -2019-04-10 15:00:00,3.70419,0.30929,0.02948,0.02769,1.28858,1.03683,3.09871,3.89544 -2019-04-10 16:00:00,3.65939,0.30724,0.02945,0.02762,1.27378,1.01723,3.05448,3.75622 -2019-04-10 17:00:00,3.6146,0.30539,0.02943,0.02754,1.26019,0.9994,3.0002,3.63166 -2019-04-10 18:00:00,3.5698,0.30369,0.0294,0.02747,1.24721,0.98258,2.9338,3.5207 -2019-04-10 19:00:00,3.52714,0.30212,0.02938,0.0274,1.23423,0.96428,2.86817,3.42245 -2019-04-10 20:00:00,3.48448,0.30066,0.02935,0.02733,1.22157,0.95228,2.8162,3.33624 -2019-04-10 21:00:00,3.44182,0.29929,0.02933,0.02726,1.21008,0.9545,2.77594,3.2616 -2019-04-10 22:00:00,3.39916,0.29801,0.02931,0.02719,1.19995,0.94679,2.742,3.19706 -2019-04-10 23:00:00,3.3565,0.29679,0.02929,0.02712,1.19379,0.9406,2.71049,3.13963 -2019-04-11 00:00:00,3.31384,0.29563,0.02926,0.02705,1.18666,0.93449,2.6807,3.08527 -2019-04-11 01:00:00,3.29366,0.29452,0.02924,0.02699,1.17841,0.92722,2.65557,3.03008 -2019-04-11 02:00:00,3.27349,0.29345,0.02922,0.02692,1.16943,0.91994,2.64202,2.97257 -2019-04-11 03:00:00,3.25331,0.29243,0.0292,0.02686,1.16057,0.91423,2.62981,2.91547 -2019-04-11 04:00:00,3.23313,0.29144,0.02918,0.0268,1.15226,0.91008,2.61391,2.86226 -2019-04-11 05:00:00,3.21296,0.29049,0.02916,0.02673,1.1445,0.90675,2.59702,2.81443 -2019-04-11 06:00:00,3.19278,0.28955,0.02914,0.02667,1.137,0.90353,2.58001,2.77169 -2019-04-11 07:00:00,3.18212,0.28865,0.02911,0.02661,1.12942,0.89985,2.56349,2.73329 -2019-04-11 08:00:00,3.17146,0.28776,0.02909,0.02655,1.12184,0.89624,2.54836,2.69882 -2019-04-11 09:00:00,3.1608,0.28689,0.02907,0.02649,1.11469,0.8934,2.5348,2.66949 -2019-04-11 10:00:00,3.15013,0.28605,0.02905,0.02643,1.10813,0.89117,2.52196,2.64566 -2019-04-11 11:00:00,3.13947,0.28522,0.02903,0.02637,1.10203,0.88902,2.50933,2.62542 -2019-04-11 12:00:00,3.12881,0.2844,0.02901,0.02631,1.09609,0.88635,2.49724,2.60709 -2019-04-11 13:00:00,3.10656,0.2836,0.02899,0.02626,1.08992,0.88257,2.48591,2.59005 -2019-04-11 14:00:00,3.08431,0.28281,0.02897,0.0262,1.08358,0.87851,2.4756,2.57392 -2019-04-11 15:00:00,3.06205,0.28202,0.02895,0.02615,1.07742,0.87513,2.46609,2.55873 -2019-04-11 16:00:00,3.0398,0.28126,0.02893,0.02609,1.07156,0.87229,2.45643,2.5446 -2019-04-11 17:00:00,3.01755,0.2805,0.02891,0.02604,1.06587,0.86929,2.44617,2.53156 -2019-04-11 18:00:00,2.9953,0.27976,0.02889,0.02598,1.06001,0.86521,2.4355,2.51944 -2019-04-11 19:00:00,2.97179,0.27903,0.02887,0.02593,1.05355,0.85921,2.42479,2.50814 -2019-04-11 20:00:00,2.94829,0.27831,0.02885,0.02588,1.04659,0.85277,2.41456,2.49756 -2019-04-11 21:00:00,2.92478,0.27761,0.02883,0.02583,1.03969,0.84753,2.40474,2.48776 -2019-04-11 22:00:00,2.90128,0.27691,0.02881,0.02577,1.03311,0.84335,2.39435,2.47878 -2019-04-11 23:00:00,2.87777,0.27623,0.02879,0.02572,1.02678,0.83955,2.38266,2.47047 -2019-04-12 00:00:00,2.85427,0.27555,0.02876,0.02567,1.02037,0.83506,2.36973,2.46253 -2019-04-12 01:00:00,2.84502,0.27488,0.02874,0.02562,1.01348,0.82915,2.35622,2.45476 -2019-04-12 02:00:00,2.83577,0.27422,0.02872,0.02558,1.00626,0.82301,2.34333,2.44702 -2019-04-12 03:00:00,2.82652,0.27356,0.02871,0.02553,0.99927,0.81822,2.33138,2.43935 -2019-04-12 04:00:00,2.81726,0.27291,0.02869,0.02548,0.9928,0.81477,2.31955,2.43187 -2019-04-12 05:00:00,2.80801,0.27227,0.02866,0.02544,0.98681,0.81205,2.30705,2.42451 -2019-04-12 06:00:00,2.79876,0.27164,0.02864,0.02539,0.98108,0.80946,2.29385,2.41694 -2019-04-12 07:00:00,2.79623,0.27101,0.02862,0.02534,0.97534,0.8067,2.28052,2.40893 -2019-04-12 08:00:00,2.7937,0.27039,0.0286,0.0253,0.96965,0.80413,2.2682,2.4003 -2019-04-12 09:00:00,2.79118,0.26977,0.02857,0.02525,0.96431,0.80215,2.25736,2.39124 -2019-04-12 10:00:00,2.78865,0.26917,0.02855,0.0252,0.95942,0.8006,2.2474,2.38209 -2019-04-12 11:00:00,2.78612,0.26858,0.02853,0.02516,0.95488,0.7991,2.23776,2.37303 -2019-04-12 12:00:00,2.78359,0.26799,0.02851,0.02511,0.95049,0.79719,2.2285,2.36395 -2019-04-12 13:00:00,2.76815,0.26741,0.02849,0.02507,0.94598,0.79458,2.21991,2.35472 -2019-04-12 14:00:00,2.7527,0.26684,0.02847,0.02503,0.94139,0.79185,2.21218,2.34522 -2019-04-12 15:00:00,2.73725,0.26628,0.02845,0.02499,0.93694,0.78958,2.20523,2.33565 -2019-04-12 16:00:00,2.72181,0.26572,0.02842,0.02494,0.9327,0.78758,2.19181,2.32641 -2019-04-12 17:00:00,2.70636,0.26516,0.0284,0.0249,0.92857,0.78531,2.17264,2.31771 -2019-04-12 18:00:00,2.69092,0.26462,0.02838,0.02486,0.92427,0.78199,2.15731,2.30958 -2019-04-12 19:00:00,2.67428,0.26407,0.02837,0.02482,0.91947,0.77719,2.14627,2.302 -2019-04-12 20:00:00,2.65763,0.26353,0.02835,0.02478,0.91427,0.77209,2.13788,2.29492 -2019-04-12 21:00:00,2.64099,0.263,0.02833,0.02474,0.90909,0.76803,2.13176,2.28844 -2019-04-12 22:00:00,2.62435,0.26247,0.0283,0.0247,0.90415,0.76487,2.12729,2.2826 -2019-04-12 23:00:00,2.6077,0.26195,0.02828,0.02466,0.89941,0.76199,2.12311,2.27315 -2019-04-13 00:00:00,2.59106,0.26143,0.02826,0.02462,0.89464,0.75859,2.11786,2.26171 -2019-04-13 01:00:00,2.59219,0.26091,0.02824,0.02459,0.88955,0.75437,2.1114,2.24859 -2019-04-13 02:00:00,2.59332,0.2604,0.02822,0.02455,0.88426,0.75023,2.10469,2.23543 -2019-04-13 03:00:00,2.59446,0.25988,0.0282,0.02452,0.87921,0.74713,2.09825,2.22322 -2019-04-13 04:00:00,2.59559,0.25937,0.02818,0.02448,0.87461,0.74503,2.09159,2.21239 -2019-04-13 05:00:00,2.59672,0.25887,0.02816,0.02445,0.87046,0.74365,2.08403,2.20312 -2019-04-13 06:00:00,2.59785,0.25837,0.02814,0.02441,0.86666,0.74279,2.07545,2.19522 -2019-04-13 07:00:00,2.60719,0.25788,0.02812,0.02438,0.86308,0.74242,2.06655,2.18821 -2019-04-13 08:00:00,2.61654,0.2574,0.0281,0.02434,0.85975,0.74233,2.05835,2.18152 -2019-04-13 09:00:00,2.62588,0.25693,0.02808,0.02431,0.85676,0.74234,2.05133,2.17482 -2019-04-13 10:00:00,2.63522,0.25647,0.02806,0.02427,0.85412,0.74234,2.04527,2.16812 -2019-04-13 11:00:00,2.64457,0.25601,0.02804,0.02424,0.85176,0.74229,2.0399,2.16145 -2019-04-13 12:00:00,2.65391,0.25556,0.02802,0.0242,0.84958,0.74212,2.03531,2.15462 -2019-04-13 13:00:00,2.64623,0.2551,0.028,0.02417,0.84747,0.74181,2.03157,2.14742 -2019-04-13 14:00:00,2.63854,0.25465,0.02798,0.02413,0.8454,0.74145,2.02845,2.13976 -2019-04-13 15:00:00,2.63086,0.2542,0.02796,0.0241,0.84341,0.74105,2.02565,2.13181 -2019-04-13 16:00:00,2.62318,0.25375,0.02794,0.02407,0.84149,0.74048,2.02285,2.12393 -2019-04-13 17:00:00,2.61549,0.25331,0.02792,0.02403,0.83955,0.73952,2.0199,2.11643 -2019-04-13 18:00:00,2.60781,0.25287,0.0279,0.024,0.83741,0.73774,2.01691,2.1094 -2019-04-13 19:00:00,2.59324,0.25243,0.02789,0.02397,0.83488,0.73496,2.01401,2.10294 -2019-04-13 20:00:00,2.57867,0.25199,0.02787,0.02394,0.83201,0.73202,2.01122,2.09711 -2019-04-13 21:00:00,2.5641,0.25156,0.02785,0.02392,0.82908,0.72965,2.00829,2.09194 -2019-04-13 22:00:00,2.54954,0.25113,0.02783,0.02389,0.82623,0.72776,2.00478,2.08737 -2019-04-13 23:00:00,2.53497,0.2507,0.02781,0.02386,0.82345,0.726,2.00025,2.0833 -2019-04-14 00:00:00,2.5204,0.25028,0.02779,0.02383,0.82061,0.72387,1.99461,2.07954 -2019-04-14 01:00:00,2.52282,0.24987,0.02777,0.0238,0.81752,0.72123,1.98842,2.07598 -2019-04-14 02:00:00,2.52524,0.24945,0.02774,0.02377,0.81427,0.71868,1.98246,2.07258 -2019-04-14 03:00:00,2.52767,0.24904,0.02772,0.02375,0.81115,0.7168,1.97698,2.06932 -2019-04-14 04:00:00,2.53009,0.24863,0.0277,0.02372,0.80834,0.71559,1.97165,2.06615 -2019-04-14 05:00:00,2.53251,0.24822,0.02768,0.02369,0.80585,0.71489,1.96594,2.06294 -2019-04-14 06:00:00,2.53493,0.24781,0.02766,0.02366,0.80362,0.71469,1.95975,2.05948 -2019-04-14 07:00:00,2.54488,0.24741,0.02764,0.02363,0.8016,0.71497,1.95358,2.05553 -2019-04-14 08:00:00,2.55483,0.24701,0.02762,0.02361,0.79981,0.71545,1.94806,2.05102 -2019-04-14 09:00:00,2.56478,0.24661,0.0276,0.02359,0.79825,0.71586,1.94345,2.04588 -2019-04-14 10:00:00,2.57473,0.24622,0.02758,0.02356,0.79689,0.71613,1.93958,2.04033 -2019-04-14 11:00:00,2.58468,0.24583,0.02756,0.02353,0.79569,0.71626,1.93635,2.03482 -2019-04-14 12:00:00,2.59463,0.24544,0.02754,0.02351,0.79458,0.71622,1.93386,2.02929 diff --git a/tests/test_cnrfc.py b/tests/test_cnrfc.py deleted file mode 100644 index c48af9c..0000000 --- a/tests/test_cnrfc.py +++ /dev/null @@ -1,83 +0,0 @@ -# -*- coding: utf-8 -*- -import os -import json - -from mock import patch -import pandas as pd -from pandas.util.testing import assert_frame_equal -import pytest - -from collect.cnrfc import * - - -@pytest.fixture() -def df(): - return pd.read_csv('tests/2019040412_N_SanJoaquin_csv_export.csv', - header=0, - skiprows=[1,], - nrows=60, - parse_dates=True, - index_col=0, - float_precision='high', - dtype={'GMT': str}) * 1000.0 - - -def test_cnrfc_credentials(): - """ - load sensitive info from .env file and test CNRFC credentials exist - """ - load_dotenv() - assert ('CNRFC_USER' in os.environ) & ('CNRFC_PASSWORD' in os.environ) - - -def test_convert_date_columns(df): - """Ensure datetime data converted to string format""" - df.index = df.index.strftime('%Y-%m-%d') - assert df.index.tolist()[0] == '2019-03-30' - - -def test_validate_duration(): - """ - function to properly format/case hourly or daily durations - """ - duration = 'Hourly' - assert validate_duration(duration) == 'hourly' - - -def test_validate_duration_invalid(): - """ - test that invalid duration raises a ValueError - """ - bad_input = 'monthly' - with pytest.raises(ValueError): - validate_duration(bad_input) - - -def test_get_deterministic_forecast(): - """ - Test that deterministic forecast start from Graphical_RVF page matches - CSV start of forecast - """ - cnrfc_id = 'FOLC1' - first_ordinate = get_forecast_meta_deterministic(cnrfc_id, first_ordinate=True)[-1] - df = get_deterministic_forecast(cnrfc_id, truncate_historical=False)['data'] - assert df['forecast'].dropna().index.tolist()[0] == first_ordinate - - -def test_get_deterministic_forecast_watershed(df): - """ - test watershed deterministic forecast download for North San Joaquin on a - particular date - """ - frame = get_deterministic_forecast_watershed('N_SanJoaquin', '2019040412')['data'] - assert_frame_equal(df, frame) - -# @patch('collect.class_instance.custom_method') -# def test_custom_class_method(display, data): -# """Assert that show calls the mocked custom_method function -# """ -# class_instance = ClassName(arg1, -# arg2, -# arg3) -# class_instance.some_other_method() -# custom_method.assert_called_once() From efe59980bfe7e25798b9d071c8e6b2adff94546a Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Mon, 27 Nov 2023 11:47:19 -0800 Subject: [PATCH 05/36] Add placeholder tests --- collect/tests/test_basics.py | 367 ++++++++++++++++++++++++++++++++--- 1 file changed, 341 insertions(+), 26 deletions(-) diff --git a/collect/tests/test_basics.py b/collect/tests/test_basics.py index dce2cbb..67a2a7c 100644 --- a/collect/tests/test_basics.py +++ b/collect/tests/test_basics.py @@ -25,12 +25,13 @@ from collect import cvo from collect import nid from collect import usgs +from collect import utils from collect.usace import wcds class TestSacAlert(unittest.TestCase): - def test_alert_get_site_notes(self): + def test_get_site_notes(self): """ test the function for retrieving site metadata produces the expected entries """ @@ -40,8 +41,13 @@ def test_alert_get_site_notes(self): self.assertEqual(result['Location:'], 'Upstream of Alpine Frost Dr. west of Bruceville Rd.') self.assertEqual(result['Date Installed:'], '2/6/1994') - def test_alert_get_data(self): - result = alert.get_data('1137', dt.datetime(2021, 3, 18, 14), dt.datetime(2021, 3, 18, 20), device_ids=[4]) + def test_get_data(self): + result = alert.get_data('1137', + dt.datetime(2021, 3, 18, 14), + dt.datetime(2021, 3, 18, 20), + device_ids=[4], + ascending=True, + as_dataframe=True) # check the queried sensor values for the specified date range self.assertEqual(result['data']['Value'].tolist(), @@ -51,17 +57,33 @@ def test_alert_get_data(self): self.assertEqual(result['data']['Receive'].tolist()[:4], ['2021-03-18 14:00:25', '2021-03-18 14:36:20', '2021-03-18 15:00:30', '2021-03-18 15:24:21']) - def test_alert_get_site_sensors(self): + def test_get_site_sensors(self): """ test the function for retrieving site metadata sensors list produces the expected number of entries """ self.assertEqual(len(alert.get_site_sensors(1122)['sensors']), 7) - def test_alert_get_sites(self): + def test_get_sites(self): """ test the function for retrieving site list for a particular gage types returns the expected number of entries """ self.assertEqual(alert.get_sites(as_dataframe=True, datatype='rain').shape, (81, 12)) + # self.assertEqual(alert.get_sites(as_dataframe=True, datatype='stream').shape, (81, 12)) + + def test_get_sites_from_list(self): + alert.get_sites_from_list(as_dataframe=True, sensor_class=None) + + def test_ustrip(self): + alert._ustrip(x) + + def test_get_site_location(self): + alert.get_site_location(site_id) + + def test_get_query_url(self): + alert.get_query_url(site_id, device_id, start, end) + + def test_get_device_series(self): + alert.get_device_series(site_id, device_id, start, end, ascending=True) class TestCNRFC(unittest.TestCase): @@ -207,41 +229,310 @@ def test_get_water_year_trend_tabular(self): df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] self.assertEqual(df.shape, (365, 9)) + def test_get_seasonal_trend_tabular(self): + cnrfc.get_seasonal_trend_tabular(cnrfc_id, water_year) + + def test_get_water_year_trend_tabular(self): + cnrfc.get_water_year_trend_tabular(cnrfc_id, water_year) + + def test_get_deterministic_forecast(self): + cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False, release=False) + + def test_get_deterministic_forecast_watershed(self): + cnrfc.get_deterministic_forecast_watershed(watershed, + date_string, + acre_feet=False, + pdt_convert=False, + as_pdt=False, + cnrfc_id=None) + + def test_get_forecast_meta_deterministic(self): + cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=False, release=False) + + def test_get_ensemble_forecast(self): + cnrfc.get_ensemble_forecast(cnrfc_id, duration, acre_feet=False, pdt_convert=False, as_pdt=False) + + def test_get_ensemble_forecast_watershed(self): + cnrfc.get_ensemble_forecast_watershed(watershed, + duration, + date_string, + acre_feet=False, + pdt_convert=False, + as_pdt=False, + cnrfc_id=None) + + def test_download_watershed_file(self): + cnrfc.download_watershed_file(watershed, date_string, forecast_type, duration=None, path=None) + + def test_get_watershed_forecast_issue_time(self): + cnrfc.get_watershed_forecast_issue_time(duration, watershed, date_string=None, deterministic=False) + + def test_get_watershed(self): + cnrfc.get_watershed(cnrfc_id) + + def test_get_ensemble_first_forecast_ordinate(self): + cnrfc.get_ensemble_first_forecast_ordinate(url=None, df=None) + + def test_get_ensemble_product_url(self): + cnrfc.get_ensemble_product_url(product_id, cnrfc_id, data_format='') + + def test_get_ensemble_product_1(self): + cnrfc.get_ensemble_product_1(cnrfc_id) + + def test_get_ensemble_product_2(self): + cnrfc.get_ensemble_product_2(cnrfc_id) + + def test_get_ensemble_product_3(self): + cnrfc.get_ensemble_product_3(cnrfc_id) + + def test_get_ensemble_product_5(self): + cnrfc.get_ensemble_product_5(cnrfc_id) + + def test_get_ensemble_product_6(self): + cnrfc.get_ensemble_product_6(cnrfc_id) + + def test_get_ensemble_product_10(self): + cnrfc.get_ensemble_product_10(cnrfc_id) + + def test_get_ensemble_product_11(self): + cnrfc.get_ensemble_product_11(cnrfc_id) -# class TestCASGEM(unittest.TestCase): + def test_get_ensemble_product_12(self): + cnrfc.get_ensemble_product_12(cnrfc_id) -# def test(self): -# pass + def test_get_ensemble_product_13(self): + cnrfc.get_ensemble_product_13(cnrfc_id) + def test_get_data_report_part_8(self): + cnrfc.get_data_report_part_8() -# class TestCAWDL(unittest.TestCase): + def test_get_monthly_reservoir_storage_summary(self): + cnrfc.get_monthly_reservoir_storage_summary() -# def test(self): -# pass + def test_esp_trace_analysis_wrapper(self): + cnrfc.esp_trace_analysis_wrapper() + def test__apply_conversions(self): + cnrfc._apply_conversions(df, duration, acre_feet, pdt_convert, as_pdt) -# class TestCDEC(unittest.TestCase): + def test__get_cnrfc_restricted_content(self): + cnrfc._get_cnrfc_restricted_content(url) -# def test(self): -# pass + def test__get_forecast_csv(self): + cnrfc._get_forecast_csv(url) + def test_get_forecast_csvdata(self): + cnrfc.get_forecast_csvdata(url) -# class TestCVO(unittest.TestCase): + def test_get_rating_curve(self): + cnrfc.get_rating_curve(cnrfc_id) -# def test(self): -# pass + def test__default_date_string(self): + cnrfc._default_date_string(date_string) + def test__parse_blue_table(self): + cnrfc._parse_blue_table(table_soup) -# class TestNID(unittest.TestCase): -# def test(self): -# pass +class TestCASGEM(unittest.TestCase): + + def test_get_casgem_data(self): + casgem.get_casgem_data(casgem_id=None, + state_well_number=None, + local_well_designation=None, + master_site_code=None, + write_to_html_file=False) + + +class TestCAWDL(unittest.TestCase): + + def test_get_cawdl_data(self): + cawdl.get_cawdl_data(site_id) + + def test_get_cawdl_surface_water_data(self): + cawdl.get_cawdl_surface_water_data(site_id, water_year, variable, interval=None) + + def test_get_cawdl_surface_water_por(self): + cawdl.get_cawdl_surface_water_por(site_id, variable, interval=None) + + def test_get_cawdl_surface_water_site_report(self): + cawdl.get_cawdl_surface_water_site_report(site_id) + + +class TestCDEC(unittest.TestCase): + + def test_get_b120_data(self): + b120.get_b120_data(date_suffix='') + + def test_validate_date_suffix(self): + b120.validate_date_suffix(date_suffix, min_year=2017) + + def test_clean_td(self): + b120.clean_td(text) + + def test_get_b120_update_data(self): + b120.get_b120_update_data(date_suffix='') + + def test_get_120_archived_reports(self): + b120.get_120_archived_reports(year, month) + + def test_april_july_dataframe(self): + b120.april_july_dataframe(data_list) + + def test_get_station_url(self): + cdec.get_station_url(station, start, end, data_format='CSV', sensors=[], duration='') + + def test_get_station_sensors(self): + cdec.get_station_sensors(station, start, end) + + def test_get_station_data(self): + cdec.get_station_data(station, start, end, sensors=[], duration='') + + def test_get_raw_station_csv(self): + cdec.get_raw_station_csv(station, start, end, sensors=[], duration='', filename='') + + def test_get_raw_station_json(self): + cdec.get_raw_station_json(station, start, end, sensors=[], duration='', filename='') + + def test_get_sensor_frame(self): + cdec.get_sensor_frame(station, start, end, sensor='', duration='') + + def test_get_station_metadata(self): + cdec.get_station_metadata(station, as_geojson=False) + + def test_get_dam_metadata(self): + cdec.get_dam_metadata(station) + + def test_get_reservoir_metadata(self): + cdec.get_reservoir_metadata(station) + + def test__get_table_index(self): + cdec._get_table_index(table_type, tables) + def test__parse_station_generic_table(self): + cdec._parse_station_generic_table(table) -# class TestSWP(unittest.TestCase): + def test__parse_station_sensors_table(self): + cdec._parse_station_sensors_table(table) -# def test(self): -# pass + def test__parse_station_comments_table(self): + cdec._parse_station_comments_table(table) + + def test__parse_data_available(self): + cdec._parse_data_available(text) + + def test_get_data(self): + cdec.get_data(station, start, end, sensor='', duration='') + + def test_get_daily_snowpack_data(self): + cdec.get_daily_snowpack_data(region, start, end) + + +class TestCVO(unittest.TestCase): + + def test(self): + pass + + # prn test + result = cvo.get_data(dt.date(2000, 2, 1), dt.date(2011, 3, 31), 'doutdly') + + # pdf test + result = cvo.get_data(dt.date(2013, 12, 1), dt.date(2014, 1, 31), 'doutdly') + result = cvo.get_data(dt.date(2000, 2, 1), dt.date(2023, 5, 1), 'shafln') + result = cvo.get_data(dt.date(2012, 6, 1), dt.date(2013, 12, 31), 'slunit') + result = cvo.get_data(dt.date(2020, 6, 1), dt.date(2021, 1, 1), 'fedslu') + result = cvo.get_data(dt.date(2021, 1, 10), dt.date.now(), 'shadop') + result = cvo.get_data(dt.date(2023, 5, 1), dt.date.now(), 'kesdop') + + def test_get_area(self): + cvo.get_area(date_structure, report_type) + + def test_get_data(self): + cvo.get_data(start, end, report_type) + + def test_get_date_published(self): + cvo.get_date_published(url, date_structure, report_type) + + def test_get_report_columns(self): + cvo.get_report_columns(report_type, date_structure, expected_length=None, default=False) + + def test_get_report(self): + cvo.get_report(date_structure, report_type) + + def test_get_title(self): + cvo.get_title(report_type) + + def test_get_url(self): + cvo.get_url(date_structure, report_type) + + def test_months_between(self): + cvo.months_between(start_date, end_date) + + def test_doutdly_data_cleaner(self): + cvo.doutdly_data_cleaner(content, report_type, date_structure) + + def test_load_pdf_to_dataframe(self): + cvo.load_pdf_to_dataframe(content, date_structure, report_type, to_csv=False) + + def test_download_files(self): + cvo.download_files(start, end, report_type, destination='.') + + +class TestNID(unittest.TestCase): + + def test_get_sites(self): + nid.get_sites() + + def test_get_issue_date(self): + nid.get_issue_date() + + def test_get_site_files(self): + nid.get_site_files(site) + + def test_get_site_metric(self): + nid.get_site_metric(site, interval='daily') + + def test_get_station_url(self): + nid.get_station_url(site, metric='index', interval=None) + + def test_get_daily_data(self): + nid.get_daily_data(site, json_compatible=False) + + def test_get_daily_meta(self): + nid.get_daily_meta(url=None, content=None) + + def test_get_hourly_data(self): + nid.get_hourly_data(site, json_compatible=False) + + def test_parse_qualifiers(self): + nid.parse_qualifiers(series) + + def test_serialize(self): + nid.serialize(df, day_format='%Y-%-m-%-d') + + +class TestSWP(unittest.TestCase): + + def test_prompt_installation_and_exit(self): + swp.prompt_installation_and_exit() + + def test_get_report_catalog(self): + swp.get_report_catalog() + + def test_get_report_url(self): + swp.get_report_url() + + def test_get_raw_text(self): + swp.get_raw_text() + + def test_get_delta_daily_data(self): + swp.get_delta_daily_data() + + def test_get_barker_slough_data(self): + swp.get_barker_slough_data() + + def test_get_oco_tabular_data(self): + swp.get_oco_tabular_data() class TestUSACE(unittest.TestCase): @@ -295,10 +586,34 @@ def test_get_reservoir_metadata(self): self.assertTrue('Precip @ Dam (in; elev 712 ft)' in result['data headers']) -# class TestUSGS(unittest.TestCase): +class TestUSGS(unittest.TestCase): + + def test_get_query_url(self): + usgs.get_query_url(station_id, sensor, start_time, end_time, interval) + + def test_get_data(self): + usgs.get_data(station_id, sensor, start_time, end_time, interval='instantaneous') + + def test_get_usgs_data(self): + usgs.get_usgs_data(station_id, sensor, start_time, end_time, interval='instantaneous') + + def test_get_peak_streamflow(self): + usgs.get_peak_streamflow(station_id) + + +class TestUtils(unittest.TestCase): + + def test_get_session_response(self): + utils.get_session_response(url) + + def test_get_web_status(self): + utils.get_web_status(url) + + def test_clean_fixed_width_headers(self): + utils.clean_fixed_width_headers(columns) -# def test(self): -# pass + def test_get_water_year(self): + utils.get_water_year(datetime_structure) if __name__ == '__main__': From 9f68b187e15b256b909ea2bfe55875131b555b9b Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Mon, 27 Nov 2023 11:56:20 -0800 Subject: [PATCH 06/36] Start Python 3.10+ pytz to zoneinfo conversion --- collect/alert/alert.py | 2 +- collect/cnrfc/cnrfc.py | 13 +- collect/tests/test_basics.py | 485 +++++++++++++++++++---------------- collect/usgs/usgs.py | 13 +- collect/utils/utils.py | 6 +- 5 files changed, 289 insertions(+), 230 deletions(-) diff --git a/collect/alert/alert.py b/collect/alert/alert.py index 4906a72..c7dfc48 100644 --- a/collect/alert/alert.py +++ b/collect/alert/alert.py @@ -33,7 +33,7 @@ def _ustrip(x): """ - strips whitespace represented by unicode non-breaking space in additon to default white + strips whitespace represented by unicode non-breaking space in addition to default white space stripping by python's str.strip() method Arguments: x (str): string containing an encoded whitespace diff --git a/collect/cnrfc/cnrfc.py b/collect/cnrfc/cnrfc.py index 3a9ffe4..a1142d3 100644 --- a/collect/cnrfc/cnrfc.py +++ b/collect/cnrfc/cnrfc.py @@ -13,15 +13,22 @@ from dateutil import parser from dotenv import load_dotenv import pandas as pd -import pytz import requests from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry from collect.cnrfc.gages import * from collect.utils.utils import clean_fixed_width_headers, get_web_status -UTC = pytz.timezone('UTC') -PACIFIC = pytz.timezone('America/Los_Angeles') +try: + from zoneinfo import ZoneInfo + UTC = ZoneInfo('UTC') + PACIFIC = ZoneInfo('America/Los_Angeles') + +except: + import pytz + UTC = pytz.timezone('UTC') + PACIFIC = pytz.timezone('America/Los_Angeles') + TODAY = dt.datetime.now().strftime('%Y%m%d') diff --git a/collect/tests/test_basics.py b/collect/tests/test_basics.py index 67a2a7c..e5a3bde 100644 --- a/collect/tests/test_basics.py +++ b/collect/tests/test_basics.py @@ -68,22 +68,46 @@ def test_get_sites(self): test the function for retrieving site list for a particular gage types returns the expected number of entries """ self.assertEqual(alert.get_sites(as_dataframe=True, datatype='rain').shape, (81, 12)) - # self.assertEqual(alert.get_sites(as_dataframe=True, datatype='stream').shape, (81, 12)) + self.assertEqual(alert.get_sites(as_dataframe=True, datatype='stream').shape, (37, 10)) def test_get_sites_from_list(self): - alert.get_sites_from_list(as_dataframe=True, sensor_class=None) + self.assertEqual(alert.get_sites_from_list(as_dataframe=True, sensor_class=None).shape, (128, 4)) def test_ustrip(self): - alert._ustrip(x) + self.assertEqual(alert.alert._ustrip('\u00A0'), '') def test_get_site_location(self): - alert.get_site_location(site_id) + result = alert.get_site_location(1122) + self.assertEqual(result['latitude'], 38.6024722) + self.assertEqual(result['longitude'], -121.3951389) def test_get_query_url(self): - alert.get_query_url(site_id, device_id, start, end) + url = alert.get_query_url(1137, 3, dt.datetime(2023, 1, 1), dt.datetime(2023, 2, 1)) + expected_url = '&'.join([ + 'https://www.sacflood.org/export/file/?site_id=1137', + 'device_id=3', + 'mode=', + 'hours=', + 'data_start=2023-01-01%2000:00:00', + 'data_end=2023-02-01%2000:00:00', + 'tz=US%2FPacific', + 'format_datetime=%25Y-%25m-%25d+%25H%3A%25i%3A%25S', + 'mime=txt', + 'delimiter=comma' + ]) + self.assertEqual(url, expected_url) def test_get_device_series(self): - alert.get_device_series(site_id, device_id, start, end, ascending=True) + result = alert.get_device_series(1108, + 6, + dt.datetime(2023, 11, 27), + dt.datetime(2023, 11, 28), + ascending=True).head(4).values.tolist() + expected_result = [['2023-11-27 00:00:00', '2023-11-27 00:31:05', 50.38, 'ft', 'A'], + ['2023-11-27 00:15:00', '2023-11-27 01:31:05', 50.38, 'ft', 'A'], + ['2023-11-27 00:30:00', '2023-11-27 01:31:05', 50.38, 'ft', 'A'], + ['2023-11-27 00:45:00', '2023-11-27 01:31:05', 50.38, 'ft', 'A']] + self.assertEqual(result, expected_result) class TestCNRFC(unittest.TestCase): @@ -229,310 +253,311 @@ def test_get_water_year_trend_tabular(self): df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] self.assertEqual(df.shape, (365, 9)) - def test_get_seasonal_trend_tabular(self): - cnrfc.get_seasonal_trend_tabular(cnrfc_id, water_year) + # def test_get_seasonal_trend_tabular(self): + # cnrfc.get_seasonal_trend_tabular(cnrfc_id, water_year) - def test_get_water_year_trend_tabular(self): - cnrfc.get_water_year_trend_tabular(cnrfc_id, water_year) + # def test_get_water_year_trend_tabular(self): + # cnrfc.get_water_year_trend_tabular(cnrfc_id, water_year) - def test_get_deterministic_forecast(self): - cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False, release=False) + # def test_get_deterministic_forecast(self): + # cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False, release=False) - def test_get_deterministic_forecast_watershed(self): - cnrfc.get_deterministic_forecast_watershed(watershed, - date_string, - acre_feet=False, - pdt_convert=False, - as_pdt=False, - cnrfc_id=None) + # def test_get_deterministic_forecast_watershed(self): + # cnrfc.get_deterministic_forecast_watershed(watershed, + # date_string, + # acre_feet=False, + # pdt_convert=False, + # as_pdt=False, + # cnrfc_id=None) - def test_get_forecast_meta_deterministic(self): - cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=False, release=False) + # def test_get_forecast_meta_deterministic(self): + # cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=False, release=False) - def test_get_ensemble_forecast(self): - cnrfc.get_ensemble_forecast(cnrfc_id, duration, acre_feet=False, pdt_convert=False, as_pdt=False) + # def test_get_ensemble_forecast(self): + # cnrfc.get_ensemble_forecast(cnrfc_id, duration, acre_feet=False, pdt_convert=False, as_pdt=False) - def test_get_ensemble_forecast_watershed(self): - cnrfc.get_ensemble_forecast_watershed(watershed, - duration, - date_string, - acre_feet=False, - pdt_convert=False, - as_pdt=False, - cnrfc_id=None) + # def test_get_ensemble_forecast_watershed(self): + # cnrfc.get_ensemble_forecast_watershed(watershed, + # duration, + # date_string, + # acre_feet=False, + # pdt_convert=False, + # as_pdt=False, + # cnrfc_id=None) - def test_download_watershed_file(self): - cnrfc.download_watershed_file(watershed, date_string, forecast_type, duration=None, path=None) + # def test_download_watershed_file(self): + # cnrfc.download_watershed_file(watershed, date_string, forecast_type, duration=None, path=None) - def test_get_watershed_forecast_issue_time(self): - cnrfc.get_watershed_forecast_issue_time(duration, watershed, date_string=None, deterministic=False) + # def test_get_watershed_forecast_issue_time(self): + # cnrfc.get_watershed_forecast_issue_time(duration, watershed, date_string=None, deterministic=False) - def test_get_watershed(self): - cnrfc.get_watershed(cnrfc_id) + # def test_get_watershed(self): + # cnrfc.get_watershed(cnrfc_id) - def test_get_ensemble_first_forecast_ordinate(self): - cnrfc.get_ensemble_first_forecast_ordinate(url=None, df=None) + # def test_get_ensemble_first_forecast_ordinate(self): + # cnrfc.get_ensemble_first_forecast_ordinate(url=None, df=None) - def test_get_ensemble_product_url(self): - cnrfc.get_ensemble_product_url(product_id, cnrfc_id, data_format='') + # def test_get_ensemble_product_url(self): + # cnrfc.get_ensemble_product_url(product_id, cnrfc_id, data_format='') - def test_get_ensemble_product_1(self): - cnrfc.get_ensemble_product_1(cnrfc_id) + # def test_get_ensemble_product_1(self): + # cnrfc.get_ensemble_product_1(cnrfc_id) - def test_get_ensemble_product_2(self): - cnrfc.get_ensemble_product_2(cnrfc_id) + # def test_get_ensemble_product_2(self): + # cnrfc.get_ensemble_product_2(cnrfc_id) - def test_get_ensemble_product_3(self): - cnrfc.get_ensemble_product_3(cnrfc_id) + # def test_get_ensemble_product_3(self): + # cnrfc.get_ensemble_product_3(cnrfc_id) - def test_get_ensemble_product_5(self): - cnrfc.get_ensemble_product_5(cnrfc_id) + # def test_get_ensemble_product_5(self): + # cnrfc.get_ensemble_product_5(cnrfc_id) - def test_get_ensemble_product_6(self): - cnrfc.get_ensemble_product_6(cnrfc_id) + # def test_get_ensemble_product_6(self): + # cnrfc.get_ensemble_product_6(cnrfc_id) - def test_get_ensemble_product_10(self): - cnrfc.get_ensemble_product_10(cnrfc_id) + # def test_get_ensemble_product_10(self): + # cnrfc.get_ensemble_product_10(cnrfc_id) - def test_get_ensemble_product_11(self): - cnrfc.get_ensemble_product_11(cnrfc_id) + # def test_get_ensemble_product_11(self): + # cnrfc.get_ensemble_product_11(cnrfc_id) - def test_get_ensemble_product_12(self): - cnrfc.get_ensemble_product_12(cnrfc_id) + # def test_get_ensemble_product_12(self): + # cnrfc.get_ensemble_product_12(cnrfc_id) - def test_get_ensemble_product_13(self): - cnrfc.get_ensemble_product_13(cnrfc_id) + # def test_get_ensemble_product_13(self): + # cnrfc.get_ensemble_product_13(cnrfc_id) - def test_get_data_report_part_8(self): - cnrfc.get_data_report_part_8() + # def test_get_data_report_part_8(self): + # cnrfc.get_data_report_part_8() - def test_get_monthly_reservoir_storage_summary(self): - cnrfc.get_monthly_reservoir_storage_summary() + # def test_get_monthly_reservoir_storage_summary(self): + # cnrfc.get_monthly_reservoir_storage_summary() - def test_esp_trace_analysis_wrapper(self): - cnrfc.esp_trace_analysis_wrapper() + # def test_esp_trace_analysis_wrapper(self): + # cnrfc.esp_trace_analysis_wrapper() - def test__apply_conversions(self): - cnrfc._apply_conversions(df, duration, acre_feet, pdt_convert, as_pdt) + # def test__apply_conversions(self): + # cnrfc._apply_conversions(df, duration, acre_feet, pdt_convert, as_pdt) - def test__get_cnrfc_restricted_content(self): - cnrfc._get_cnrfc_restricted_content(url) + # def test__get_cnrfc_restricted_content(self): + # cnrfc._get_cnrfc_restricted_content(url) - def test__get_forecast_csv(self): - cnrfc._get_forecast_csv(url) + # def test__get_forecast_csv(self): + # cnrfc._get_forecast_csv(url) - def test_get_forecast_csvdata(self): - cnrfc.get_forecast_csvdata(url) + # def test_get_forecast_csvdata(self): + # cnrfc.get_forecast_csvdata(url) - def test_get_rating_curve(self): - cnrfc.get_rating_curve(cnrfc_id) + # def test_get_rating_curve(self): + # cnrfc.get_rating_curve(cnrfc_id) - def test__default_date_string(self): - cnrfc._default_date_string(date_string) + # def test__default_date_string(self): + # cnrfc._default_date_string(date_string) - def test__parse_blue_table(self): - cnrfc._parse_blue_table(table_soup) + # def test__parse_blue_table(self): + # cnrfc._parse_blue_table(table_soup) -class TestCASGEM(unittest.TestCase): +# class TestCASGEM(unittest.TestCase): - def test_get_casgem_data(self): - casgem.get_casgem_data(casgem_id=None, - state_well_number=None, - local_well_designation=None, - master_site_code=None, - write_to_html_file=False) + # def test_get_casgem_data(self): + # result = casgem.get_casgem_data(casgem_id=None, + # state_well_number=None, + # local_well_designation=None, + # master_site_code=None, + # write_to_html_file=False) + # print(result) -class TestCAWDL(unittest.TestCase): +# class TestCAWDL(unittest.TestCase): - def test_get_cawdl_data(self): - cawdl.get_cawdl_data(site_id) + # def test_get_cawdl_data(self): + # cawdl.get_cawdl_data(site_id) - def test_get_cawdl_surface_water_data(self): - cawdl.get_cawdl_surface_water_data(site_id, water_year, variable, interval=None) + # def test_get_cawdl_surface_water_data(self): + # cawdl.get_cawdl_surface_water_data(site_id, water_year, variable, interval=None) - def test_get_cawdl_surface_water_por(self): - cawdl.get_cawdl_surface_water_por(site_id, variable, interval=None) + # def test_get_cawdl_surface_water_por(self): + # cawdl.get_cawdl_surface_water_por(site_id, variable, interval=None) - def test_get_cawdl_surface_water_site_report(self): - cawdl.get_cawdl_surface_water_site_report(site_id) + # def test_get_cawdl_surface_water_site_report(self): + # cawdl.get_cawdl_surface_water_site_report(site_id) -class TestCDEC(unittest.TestCase): +# class TestCDEC(unittest.TestCase): - def test_get_b120_data(self): - b120.get_b120_data(date_suffix='') + # def test_get_b120_data(self): + # b120.get_b120_data(date_suffix='') - def test_validate_date_suffix(self): - b120.validate_date_suffix(date_suffix, min_year=2017) + # def test_validate_date_suffix(self): + # b120.validate_date_suffix(date_suffix, min_year=2017) - def test_clean_td(self): - b120.clean_td(text) + # def test_clean_td(self): + # b120.clean_td(text) - def test_get_b120_update_data(self): - b120.get_b120_update_data(date_suffix='') + # def test_get_b120_update_data(self): + # b120.get_b120_update_data(date_suffix='') - def test_get_120_archived_reports(self): - b120.get_120_archived_reports(year, month) + # def test_get_120_archived_reports(self): + # b120.get_120_archived_reports(year, month) - def test_april_july_dataframe(self): - b120.april_july_dataframe(data_list) + # def test_april_july_dataframe(self): + # b120.april_july_dataframe(data_list) - def test_get_station_url(self): - cdec.get_station_url(station, start, end, data_format='CSV', sensors=[], duration='') + # def test_get_station_url(self): + # cdec.get_station_url(station, start, end, data_format='CSV', sensors=[], duration='') - def test_get_station_sensors(self): - cdec.get_station_sensors(station, start, end) + # def test_get_station_sensors(self): + # cdec.get_station_sensors(station, start, end) - def test_get_station_data(self): - cdec.get_station_data(station, start, end, sensors=[], duration='') + # def test_get_station_data(self): + # cdec.get_station_data(station, start, end, sensors=[], duration='') - def test_get_raw_station_csv(self): - cdec.get_raw_station_csv(station, start, end, sensors=[], duration='', filename='') + # def test_get_raw_station_csv(self): + # cdec.get_raw_station_csv(station, start, end, sensors=[], duration='', filename='') - def test_get_raw_station_json(self): - cdec.get_raw_station_json(station, start, end, sensors=[], duration='', filename='') + # def test_get_raw_station_json(self): + # cdec.get_raw_station_json(station, start, end, sensors=[], duration='', filename='') - def test_get_sensor_frame(self): - cdec.get_sensor_frame(station, start, end, sensor='', duration='') + # def test_get_sensor_frame(self): + # cdec.get_sensor_frame(station, start, end, sensor='', duration='') - def test_get_station_metadata(self): - cdec.get_station_metadata(station, as_geojson=False) + # def test_get_station_metadata(self): + # cdec.get_station_metadata(station, as_geojson=False) - def test_get_dam_metadata(self): - cdec.get_dam_metadata(station) + # def test_get_dam_metadata(self): + # cdec.get_dam_metadata(station) - def test_get_reservoir_metadata(self): - cdec.get_reservoir_metadata(station) + # def test_get_reservoir_metadata(self): + # cdec.get_reservoir_metadata(station) - def test__get_table_index(self): - cdec._get_table_index(table_type, tables) + # def test__get_table_index(self): + # cdec._get_table_index(table_type, tables) - def test__parse_station_generic_table(self): - cdec._parse_station_generic_table(table) + # def test__parse_station_generic_table(self): + # cdec._parse_station_generic_table(table) - def test__parse_station_sensors_table(self): - cdec._parse_station_sensors_table(table) + # def test__parse_station_sensors_table(self): + # cdec._parse_station_sensors_table(table) - def test__parse_station_comments_table(self): - cdec._parse_station_comments_table(table) + # def test__parse_station_comments_table(self): + # cdec._parse_station_comments_table(table) - def test__parse_data_available(self): - cdec._parse_data_available(text) + # def test__parse_data_available(self): + # cdec._parse_data_available(text) - def test_get_data(self): - cdec.get_data(station, start, end, sensor='', duration='') + # def test_get_data(self): + # cdec.get_data(station, start, end, sensor='', duration='') - def test_get_daily_snowpack_data(self): - cdec.get_daily_snowpack_data(region, start, end) + # def test_get_daily_snowpack_data(self): + # cdec.get_daily_snowpack_data(region, start, end) -class TestCVO(unittest.TestCase): +# class TestCVO(unittest.TestCase): - def test(self): - pass + # def test(self): + # pass - # prn test - result = cvo.get_data(dt.date(2000, 2, 1), dt.date(2011, 3, 31), 'doutdly') + # # prn test + # result = cvo.get_data(dt.date(2000, 2, 1), dt.date(2011, 3, 31), 'doutdly') - # pdf test - result = cvo.get_data(dt.date(2013, 12, 1), dt.date(2014, 1, 31), 'doutdly') - result = cvo.get_data(dt.date(2000, 2, 1), dt.date(2023, 5, 1), 'shafln') - result = cvo.get_data(dt.date(2012, 6, 1), dt.date(2013, 12, 31), 'slunit') - result = cvo.get_data(dt.date(2020, 6, 1), dt.date(2021, 1, 1), 'fedslu') - result = cvo.get_data(dt.date(2021, 1, 10), dt.date.now(), 'shadop') - result = cvo.get_data(dt.date(2023, 5, 1), dt.date.now(), 'kesdop') + # # pdf test + # result = cvo.get_data(dt.date(2013, 12, 1), dt.date(2014, 1, 31), 'doutdly') + # result = cvo.get_data(dt.date(2000, 2, 1), dt.date(2023, 5, 1), 'shafln') + # result = cvo.get_data(dt.date(2012, 6, 1), dt.date(2013, 12, 31), 'slunit') + # result = cvo.get_data(dt.date(2020, 6, 1), dt.date(2021, 1, 1), 'fedslu') + # result = cvo.get_data(dt.date(2021, 1, 10), dt.date.now(), 'shadop') + # result = cvo.get_data(dt.date(2023, 5, 1), dt.date.now(), 'kesdop') - def test_get_area(self): - cvo.get_area(date_structure, report_type) + # def test_get_area(self): + # cvo.get_area(date_structure, report_type) - def test_get_data(self): - cvo.get_data(start, end, report_type) + # def test_get_data(self): + # cvo.get_data(start, end, report_type) - def test_get_date_published(self): - cvo.get_date_published(url, date_structure, report_type) + # def test_get_date_published(self): + # cvo.get_date_published(url, date_structure, report_type) - def test_get_report_columns(self): - cvo.get_report_columns(report_type, date_structure, expected_length=None, default=False) + # def test_get_report_columns(self): + # cvo.get_report_columns(report_type, date_structure, expected_length=None, default=False) - def test_get_report(self): - cvo.get_report(date_structure, report_type) + # def test_get_report(self): + # cvo.get_report(date_structure, report_type) - def test_get_title(self): - cvo.get_title(report_type) + # def test_get_title(self): + # cvo.get_title(report_type) - def test_get_url(self): - cvo.get_url(date_structure, report_type) + # def test_get_url(self): + # cvo.get_url(date_structure, report_type) - def test_months_between(self): - cvo.months_between(start_date, end_date) + # def test_months_between(self): + # cvo.months_between(start_date, end_date) - def test_doutdly_data_cleaner(self): - cvo.doutdly_data_cleaner(content, report_type, date_structure) + # def test_doutdly_data_cleaner(self): + # cvo.doutdly_data_cleaner(content, report_type, date_structure) - def test_load_pdf_to_dataframe(self): - cvo.load_pdf_to_dataframe(content, date_structure, report_type, to_csv=False) + # def test_load_pdf_to_dataframe(self): + # cvo.load_pdf_to_dataframe(content, date_structure, report_type, to_csv=False) - def test_download_files(self): - cvo.download_files(start, end, report_type, destination='.') + # def test_download_files(self): + # cvo.download_files(start, end, report_type, destination='.') -class TestNID(unittest.TestCase): +# class TestNID(unittest.TestCase): - def test_get_sites(self): - nid.get_sites() + # def test_get_sites(self): + # nid.get_sites() - def test_get_issue_date(self): - nid.get_issue_date() + # def test_get_issue_date(self): + # nid.get_issue_date() - def test_get_site_files(self): - nid.get_site_files(site) + # def test_get_site_files(self): + # nid.get_site_files(site) - def test_get_site_metric(self): - nid.get_site_metric(site, interval='daily') + # def test_get_site_metric(self): + # nid.get_site_metric(site, interval='daily') - def test_get_station_url(self): - nid.get_station_url(site, metric='index', interval=None) + # def test_get_station_url(self): + # nid.get_station_url(site, metric='index', interval=None) - def test_get_daily_data(self): - nid.get_daily_data(site, json_compatible=False) + # def test_get_daily_data(self): + # nid.get_daily_data(site, json_compatible=False) - def test_get_daily_meta(self): - nid.get_daily_meta(url=None, content=None) + # def test_get_daily_meta(self): + # nid.get_daily_meta(url=None, content=None) - def test_get_hourly_data(self): - nid.get_hourly_data(site, json_compatible=False) + # def test_get_hourly_data(self): + # nid.get_hourly_data(site, json_compatible=False) - def test_parse_qualifiers(self): - nid.parse_qualifiers(series) + # def test_parse_qualifiers(self): + # nid.parse_qualifiers(series) - def test_serialize(self): - nid.serialize(df, day_format='%Y-%-m-%-d') + # def test_serialize(self): + # nid.serialize(df, day_format='%Y-%-m-%-d') -class TestSWP(unittest.TestCase): +# class TestSWP(unittest.TestCase): - def test_prompt_installation_and_exit(self): - swp.prompt_installation_and_exit() + # def test_prompt_installation_and_exit(self): + # swp.prompt_installation_and_exit() - def test_get_report_catalog(self): - swp.get_report_catalog() + # def test_get_report_catalog(self): + # swp.get_report_catalog() - def test_get_report_url(self): - swp.get_report_url() + # def test_get_report_url(self): + # swp.get_report_url() - def test_get_raw_text(self): - swp.get_raw_text() + # def test_get_raw_text(self): + # swp.get_raw_text() - def test_get_delta_daily_data(self): - swp.get_delta_daily_data() + # def test_get_delta_daily_data(self): + # swp.get_delta_daily_data() - def test_get_barker_slough_data(self): - swp.get_barker_slough_data() + # def test_get_barker_slough_data(self): + # swp.get_barker_slough_data() - def test_get_oco_tabular_data(self): - swp.get_oco_tabular_data() + # def test_get_oco_tabular_data(self): + # swp.get_oco_tabular_data() class TestUSACE(unittest.TestCase): @@ -589,31 +614,49 @@ def test_get_reservoir_metadata(self): class TestUSGS(unittest.TestCase): def test_get_query_url(self): - usgs.get_query_url(station_id, sensor, start_time, end_time, interval) + url = usgs.get_query_url(11418500, '00060', dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 5), 'instantaneous') + expected_url = '&'.join(['https://waterservices.usgs.gov/nwis/iv/?format=json', + 'sites=11418500', + 'startDT=2023-01-01T00:00:00', + 'endDT=2023-01-05T00:00:00', + 'parameterCd=00060', + 'siteStatus=all']) + self.assertEqual(url, expected_url) def test_get_data(self): - usgs.get_data(station_id, sensor, start_time, end_time, interval='instantaneous') + result = usgs.get_data(11418500, '00060', dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 5), interval='daily') + self.assertEqual(result['data']['00060'].tolist(), [1280.0, 341.0, 351.0, 260.0, 1790.0]) + self.assertEqual(result['data'].index.strftime('%Y-%m-%d').tolist(), + ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']) def test_get_usgs_data(self): - usgs.get_usgs_data(station_id, sensor, start_time, end_time, interval='instantaneous') + result = usgs.get_usgs_data(11418500, '00060', dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 5), interval='daily') + self.assertEqual(result['data']['00060'].tolist(), [1280.0, 341.0, 351.0, 260.0, 1790.0]) + self.assertEqual(result['data'].index.strftime('%Y-%m-%d').tolist(), + ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']) def test_get_peak_streamflow(self): - usgs.get_peak_streamflow(station_id) + result = usgs.get_peak_streamflow(11418500)['data'][['peak_va']] + self.assertEqual(result.head()['peak_va'].tolist(), + ['14000', '6260', '7520', '10800', '2400']) + self.assertEqual(result.head().index.strftime('%Y-%m-%d').tolist(), + ['1928-02-29', '1936-02-21', '1937-02-04', '1937-12-11', '1939-03-08']) class TestUtils(unittest.TestCase): - def test_get_session_response(self): - utils.get_session_response(url) + # def test_get_session_response(self): + # utils.get_session_response(url) - def test_get_web_status(self): - utils.get_web_status(url) + # def test_get_web_status(self): + # utils.get_web_status(url) - def test_clean_fixed_width_headers(self): - utils.clean_fixed_width_headers(columns) + # def test_clean_fixed_width_headers(self): + # utils.clean_fixed_width_headers(columns) def test_get_water_year(self): - utils.get_water_year(datetime_structure) + self.assertEqual(utils.get_water_year(dt.datetime(2023, 5, 12)), 2023) + self.assertEqual(utils.get_water_year(dt.datetime(2023, 11, 12)), 2024) if __name__ == '__main__': diff --git a/collect/usgs/usgs.py b/collect/usgs/usgs.py index 01dc8b3..ed4e8bd 100644 --- a/collect/usgs/usgs.py +++ b/collect/usgs/usgs.py @@ -4,7 +4,7 @@ USGS National Water Information System (NWIS) """ # -*- coding: utf-8 -*- - +import datetime as dt from bs4 import BeautifulSoup import dateutil.parser import pandas as pd @@ -121,16 +121,21 @@ def get_peak_streamflow(station_id): url = '?'.join(['https://nwis.waterdata.usgs.gov/nwis/peak', 'site_no={station_id}&agency_cd=USGS&format=rdb']).format(station_id=station_id) + def leap_filter(x): + if x.split('-', 1)[-1] == '03-00': + x = x.replace('03-00', '02-29') + return dt.datetime.strptime(x, '%Y-%m-%d') + # process annual peak time series from tab-delimited table frame = pd.read_csv(url, comment='#', - parse_dates=True, + parse_dates=False, header=0, delimiter='\t') frame.drop(0, axis=0, inplace=True) - frame.index = pd.to_datetime(frame['peak_dt']) + frame.index = pd.to_datetime(frame['peak_dt'].apply(leap_filter)) - # load USGS site informatiokn + # load USGS site information result = BeautifulSoup(requests.get(url.rstrip('rdb')).content, 'lxml') info = {'site number': station_id, 'site name': result.find('h2').text} meta = result.findAll('div', {'class': 'leftsidetext'})[0] diff --git a/collect/utils/utils.py b/collect/utils/utils.py index 5e7a02c..29ef8a9 100644 --- a/collect/utils/utils.py +++ b/collect/utils/utils.py @@ -4,6 +4,10 @@ The utilities module of MBK Engineers' collect project """ # -*- coding: utf-8 -*- +import urllib3.contrib.pyopenssl +urllib3.contrib.pyopenssl.inject_into_urllib3() +import ssl + import requests from requests.packages.urllib3.util.retry import Retry from requests.adapters import HTTPAdapter @@ -21,7 +25,7 @@ def get_session_response(url): backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]) session.mount('https://', HTTPAdapter(max_retries=retries)) - return session.get(url, verify=False) + return session.get(url, verify=ssl.CERT_NONE) def get_web_status(url): From d49a357d46b595876180a50f4e01a6c2103aab6c Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Mon, 27 Nov 2023 18:56:19 -0800 Subject: [PATCH 07/36] NID adjustments for new calendar year data format; additional test support --- collect/nid/nid.py | 32 +++-- collect/tests/test_basics.py | 223 ++++++++++++++++++++++++++++------- 2 files changed, 198 insertions(+), 57 deletions(-) diff --git a/collect/nid/nid.py b/collect/nid/nid.py index 3e6da12..ca458b9 100644 --- a/collect/nid/nid.py +++ b/collect/nid/nid.py @@ -36,7 +36,8 @@ from bs4 import BeautifulSoup import pandas as pd -import requests + +from collect import utils def get_sites(): @@ -46,8 +47,8 @@ def get_sites(): Returns: sites (dict): dictionary of site IDs and titles """ - url = "https://river-lake.nidwater.com/hyquick/index.htm" - df = pd.read_html(requests.get(url).content, header=1, index_col=0)[0] + url = 'https://river-lake.nidwater.com/hyquick/index.htm' + df = pd.read_html(utils.get_session_response(url).content, header=1, index_col=0)[0] sites = df.to_dict()['Name'] return sites @@ -59,8 +60,8 @@ def get_issue_date(): Returns: issue_date (datetime.datetime): the last update of the NID hyquick page """ - url = "https://river-lake.nidwater.com/hyquick/index.htm" - df = pd.read_html(requests.get(url).content, header=None)[0] + url = 'https://river-lake.nidwater.com/hyquick/index.htm' + df = pd.read_html(utils.get_session_response(url).content, header=None)[0] return dt.datetime.strptime(df.iloc[0, 1], 'Run on %Y/%m/%d %H:%M:%S') @@ -73,7 +74,7 @@ def get_site_files(site): links (list): sorted list of linked files available for site """ url = get_station_url(site, metric='index') - soup = BeautifulSoup(requests.get(url).content, 'lxml') + soup = BeautifulSoup(utils.get_session_response(url).content, 'lxml') links = {a.get('href') for a in soup.find_all('a')} return sorted(links) @@ -127,7 +128,7 @@ def get_daily_data(site, json_compatible=False): """ metric = get_site_metric(site, interval='daily') url = get_station_url(site, metric=metric, interval='daily') - response = requests.get(url).text + response = utils.get_session_response(url).text frames = [] for group in re.split(r'(?=Nevada Irrigation District\s+)', response): @@ -136,23 +137,27 @@ def get_daily_data(site, json_compatible=False): continue # split by start of table header line - pre_table, table = re.split(r'(?=Day\s{2,}OCT)', group) + pre_table, table = re.split(r'(?=Day\s{2,}JAN)', group) # get water year, site info for water year table meta = get_daily_meta(content=pre_table) # load water year table to dataframe - data = pd.read_fwf(io.StringIO(re.split(r'\nTotal', table)[0]), + data = pd.read_fwf(io.StringIO(re.split(r'\nMax', table)[0]), header=0, - skiprows=[1], + skiprows=[1], nrows=36, na_values=['------', 'NaN', '']).dropna(how='all') # convert from monthly table to water-year series df = pd.melt(data, id_vars='Day').rename({'variable': 'month', 'value': metric}, axis=1) + # defunct for report formatted by water year + # df['year'] = df['month'].apply(lambda x: meta['water_year'] -1 if x in ['OCT', 'NOV', 'DEC'] + # else meta['water_year']) + # assign calendar year to each entry - df['year'] = df['month'].apply(lambda x: meta['water_year'] -1 if x in ['OCT', 'NOV', 'DEC'] else meta['water_year']) + df['year'] = meta['year'] df.index = df['Day'].astype(str) + df['month'] + df['year'].astype(str) # drop non-existent date entries (i.e. 31NOVYYYY) @@ -172,6 +177,7 @@ def get_daily_data(site, json_compatible=False): 'district': meta['district'], 'version': meta['version'], 'report_stamp': meta['report_stamp'], + 'year': meta['year'], 'url': url, 'metric': metric, 'timeseries_type': {'flow': 'flows', 'volume': 'storages'}.get(metric), @@ -188,7 +194,7 @@ def get_daily_meta(url=None, content=None): """ if url: data = [re.sub(r'\s{2,}|:\s+|:', '|', x.strip()).split('|') - for x in requests.get(url).text.splitlines()[:10]] + for x in utils.get_session_response(url).text.splitlines()[:10]] elif content: data = [re.sub(r'\s{2,}|:\s+|:', '|', x.strip()).split('|') for x in content.splitlines()] @@ -199,7 +205,7 @@ def get_daily_meta(url=None, content=None): result.update({row[0]: row[1]}) # extract water year from end date entry - result.update({'water_year': dt.datetime.strptime(result['Ending Date'], '%m/%d/%Y').year}) + result.update({'year': dt.datetime.strptime(result['Ending Date'], '%m/%d/%Y').year}) return result diff --git a/collect/tests/test_basics.py b/collect/tests/test_basics.py index e5a3bde..cde976d 100644 --- a/collect/tests/test_basics.py +++ b/collect/tests/test_basics.py @@ -48,7 +48,7 @@ def test_get_data(self): device_ids=[4], ascending=True, as_dataframe=True) - + # check the queried sensor values for the specified date range self.assertEqual(result['data']['Value'].tolist(), [0.0, 0.04, 0.0, 0.04, 0.04, 0.0, 0.0, 0.04, 0.0, 0.04, 0.04, 0.04, 0.0, 0.04, 0.0]) @@ -188,7 +188,7 @@ def deterministic_frame(self): float_precision='high', dtype={'GMT': str}).mul(1000) return self._deterministic_frame - + def test_cnrfc_credentials(self): """ load sensitive info from .env file and test CNRFC credentials exist @@ -207,7 +207,7 @@ def test_validate_duration(self): """ duration = 'Hourly' self.assertEqual(cnrfc.cnrfc._validate_duration(duration), 'hourly') - + def test_validate_duration_invalid(self): """ test that invalid duration raises a ValueError @@ -247,8 +247,7 @@ def test_get_deterministic_forecast_watershed(self): def test_get_water_year_trend_tabular(self): """ - test watershed deterministic forecast download for North San Joaquin on a - particular date + test watershed deterministic forecast download for North San Joaquin on a particular date """ df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] self.assertEqual(df.shape, (365, 9)) @@ -471,69 +470,205 @@ def test_get_water_year_trend_tabular(self): # def test_get_area(self): # cvo.get_area(date_structure, report_type) - + # def test_get_data(self): # cvo.get_data(start, end, report_type) - + # def test_get_date_published(self): # cvo.get_date_published(url, date_structure, report_type) - + # def test_get_report_columns(self): # cvo.get_report_columns(report_type, date_structure, expected_length=None, default=False) - + # def test_get_report(self): # cvo.get_report(date_structure, report_type) - + # def test_get_title(self): # cvo.get_title(report_type) - + # def test_get_url(self): # cvo.get_url(date_structure, report_type) - + # def test_months_between(self): # cvo.months_between(start_date, end_date) - + # def test_doutdly_data_cleaner(self): # cvo.doutdly_data_cleaner(content, report_type, date_structure) - + # def test_load_pdf_to_dataframe(self): # cvo.load_pdf_to_dataframe(content, date_structure, report_type, to_csv=False) - + # def test_download_files(self): # cvo.download_files(start, end, report_type, destination='.') -# class TestNID(unittest.TestCase): - - # def test_get_sites(self): - # nid.get_sites() - - # def test_get_issue_date(self): - # nid.get_issue_date() - - # def test_get_site_files(self): - # nid.get_site_files(site) - - # def test_get_site_metric(self): - # nid.get_site_metric(site, interval='daily') - - # def test_get_station_url(self): - # nid.get_station_url(site, metric='index', interval=None) - - # def test_get_daily_data(self): - # nid.get_daily_data(site, json_compatible=False) - - # def test_get_daily_meta(self): - # nid.get_daily_meta(url=None, content=None) +class TestNID(unittest.TestCase): - # def test_get_hourly_data(self): - # nid.get_hourly_data(site, json_compatible=False) - - # def test_parse_qualifiers(self): - # nid.parse_qualifiers(series) + @property + def sample_daily_data(self): + if not hasattr(self, '_sample_daily_data'): + self._sample_daily_data = io.StringIO(textwrap.dedent("""\ + Nevada Irrigation District USDAY V123 Output 11/22/2023 + + Summary Report + + Site: DC900 Scott's Flat Reservoir + USGS #: + Beginning Date: 01/01/2023 + Ending Date: 12/31/2023 + + Daily 2400 Storage Volume in Acre-Feet Water Year Jan 2023 to Dec 2023 + + 12/31/2022 44500 + + Day JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC + ------------------------------------------------------------------------------------------------------------------------------------ + 1 45300 48500 48500 48500 48500 47800 47700 45100 42400 40100 + 2 45800 48500 48500 48500 48500 47800 47600 45000 42300 40100 + 3 46200 48500 48500 48500 48500 47900 47500 44900 42300 40000 + 4 46400 48500 48500 48500 48500 48000 47500 44800 42200 40000 + 5 46900 48500 48500 48500 48500 48000 47400 44700 42100 39900 + + 6 47300 48500 48500 48500 48500 48000 47400 44600 42000 39900 + 7 47500 48500 48500 48500 48500 48100 47300 44500 42000 39800 + 8 47800 48500 48500 48500 48500 48000 47200 44400 41900 39800 + 9 48500 48500 48500 48500 48500 48000 47200 44400 41800 39800 + 10 48500 48500 48500 48500 48500 48000 47100 44300 41700 39700 + + 11 48500 48500 48500 48500 48500 48000 47000 44200 41600 39700 + 12 48500 48500 48500 48500 48500 48000 47000 44100 41500 39700 + 13 48500 48500 48500 48500 48400 48100 46900 44000 41400 39600 + 14 48500 48500 48500 48500 48400 48100 46800 43900 41400 39600 + 15 48500 48500 48500 48500 48300 48100 46700 43800 41300 39700 + + 16 48500 48500 48500 48500 48300 48100 46600 43700 41200 39800 + 17 48500 48500 48500 48500 48200 48100 46500 43600 41100 39800 + 18 48500 48500 48500 48500 48100 48000 46400 43500 41000 39900 + 19 48500 48500 48500 48500 48100 48000 46400 43400 40900 40000 + 20 48500 48400 48500 48500 48000 48000 46300 43400 40900 40100 + + 21 48500 48400 48500 48500 47900 48000 46200 43300 40800 40100 + 22 48500 48400 48500 48500 47800 48000 46100 43200 40700 40200 + 23 48500 48300 48500 48500 47800 47900 46000 43200 40600 + 24 48500 48400 48500 48500 47700 47900 45900 43100 40600 + 25 48500 48300 48500 48500 47600 47900 45800 43000 40500 + + 26 48500 48400 48500 48500 47600 47900 45700 42900 40400 + 27 48500 48500 48500 48500 47600 47800 45600 42800 40400 + 28 48500 48500 48500 48500 47600 47800 45500 42700 40300 + 29 48500 ------ 48500 48500 47700 47700 45400 42600 40300 + 30 48500 ------ 48500 48500 47700 47700 45300 42500 40200 + 31 48500 ------ 48500 ------ 47700 ------ 45200 42400 ------ ------ + + Max 48500 48500 48500 48500 48500 48100 47700 45100 42400 40200 + Min 45300 48300 48500 48500 47600 47700 45200 42400 40200 39600 + Change 4000 0 0 0 -800 0 -2500 -2800 -2200 + + Cal Year 2023 Mean 46300 Max 48500 Min 39600 Inst Max 48500 + + ------------------ Notes ------------------- + All recorded data is continuous and reliable + """)) + return self._sample_daily_data - # def test_serialize(self): - # nid.serialize(df, day_format='%Y-%-m-%-d') + def test_get_sites(self): + result = nid.get_sites() + expected_dict = {'BR100': 'Auburn Ravine I at Head', + 'BR220': 'Hemphill Canal at Head', + 'BR301': 'Combie Phase I at Head', + 'BR334': 'Camp Far West at Head', + 'BR368': 'Gold Hill I at Head', + 'BR900': 'Combie Reservoir-Spill-1600.', + 'BSCA': 'Bowman-Spaulding Canal Intake Near Graniteville, Ca', + 'BWMN': 'Bowman Lake Near Graniteville, Ca', + 'CPFL': 'Chicago Park Flume Near Dutch Flat, Ca', + 'DC102': 'Cascade at Head', + 'DC131': 'Newtown Canal at Head', + 'DC140': 'Tunnel Canal at Head', + 'DC145': 'D. S. Canal at Head', + 'DC169': 'Tarr Canal at Head', + 'DC900': "Scott's Flat Reservoir", + 'DFFL': 'Dutch Flat #2 Flume Near Blue Canyon, Ca', + 'FAUC': 'Faucherie Lake Near Cisco, Ca', + 'FRLK': 'French Lake Near Cisco Grove, Ca', + 'JKSN': 'Jackson Lake near Sierra City', + 'JMDW': 'Jackson Meadows Reservoir Near Sierra City, Ca', + 'MBTO': 'Milton-Bowman Tunnel Outlet (South Portal)', + 'ROLK': 'Rollins Reservoir Near Colfax, Ca', + 'SWML': 'Sawmill Lake Near Graniteville, Ca', + 'WLSN': 'Wilson Creek near Sierra City'} + self.assertEqual(result, expected_dict) + + def test_get_issue_date(self): + result = nid.get_issue_date() + self.assertTrue(isinstance(result, dt.datetime)) + self.assertLess(result, dt.datetime.now()) + + def test_get_site_files(self): + site = 'DC140' + result = nid.get_site_files('DC140') + self.assertEqual(sorted(result), [f'{site}.adesc.pdf', + f'{site}.csv_flow.csv', + f'{site}.plot_flow.png', + f'{site}.usday_daily_flow.txt']) + + def test_get_site_metric(self): + self.assertEqual(nid.get_site_metric('BR334', interval='daily'), 'flow') + + def test_get_station_url(self): + self.assertEqual(nid.get_station_url('ROLK', metric='index', interval=None), + 'https://river-lake.nidwater.com/hyquick/ROLK/index.htm') + + self.assertEqual(nid.get_station_url('ROLK', metric='flow', interval='daily'), + 'https://river-lake.nidwater.com/hyquick/ROLK/ROLK.usday_daily_flow.txt') + + self.assertEqual(nid.get_station_url('ROLK', metric='flow', interval='hourly'), + 'https://river-lake.nidwater.com/hyquick/ROLK/ROLK.csv_flow.csv') + + def test_get_daily_data(self): + result = nid.get_daily_data('DC900', json_compatible=False) + year = result['info']['year'] + self.assertEqual(result['data'].head(4).index.strftime('%Y-%m-%d').tolist(), + [f'{year}-01-01', f'{year}-01-02', f'{year}-01-03', f'{year}-01-04']) + + def test_get_daily_meta(self): + url = 'https://river-lake.nidwater.com/hyquick/DC140/DC140.usday_daily_flow.txt' + result = nid.get_daily_meta(url=url, content=None) + self.assertEqual(result['Site'], 'DC140 Tunnel Canal at Head') + self.assertEqual(result['USGS #'], 'NO') + self.assertEqual(result['version'], 'USDAY V123') + + def test_get_hourly_data(self): + result = nid.get_hourly_data('WLSN', json_compatible=False) + sample = result['data'].head() + self.assertEqual(sample.index.strftime('%Y-%m-%d %H:%M:%S').tolist(), + ['2022-01-01 01:00:00', + '2022-01-01 02:00:00', + '2022-01-01 03:00:00', + '2022-01-01 04:00:00', + '2022-01-01 05:00:00']) + self.assertEqual(sample['Amount Diverted (AF)'].tolist(), [0.15, 0.15, 0.15, 0.15, 0.15]) + + def test_parse_qualifiers(self): + series = pd.Series(data=['Qualities:', + '2 - Good quality edited data', + '22 - Raw Satellite Data', + '28 - Radio data', + '255 - No data exists'], + name='Site Information') + self.assertEqual(nid.parse_qualifiers(series), + {'2': 'Good quality edited data', + '22': 'Raw Satellite Data', + '28': 'Radio data', + '255': 'No data exists'}) + + def test_serialize(self): + df = pd.DataFrame(index=pd.date_range('2020-12-01', '2020-12-03', freq='D'), + data={'VALUE': [42] * 3}) + self.assertEqual(nid.serialize(df.copy(), day_format='%Y-%m-%d'), + {'VALUE': {'2020-12-01 00:00': 42, '2020-12-02 00:00': 42, '2020-12-03 00:00': 42}}) + self.assertEqual(nid.serialize(df.copy(), day_format='%Y-%-m-%-d'), + {'VALUE': {'2020-12-1 00:00': 42, '2020-12-2 00:00': 42, '2020-12-3 00:00': 42}}) # class TestSWP(unittest.TestCase): From e3a07073f1b4f16bc55237da4f024f6a6cef5144 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Tue, 28 Nov 2023 10:29:29 -0800 Subject: [PATCH 08/36] Update selenium version, which has tools for automatically handling chromedriver --- collect/dwr/casgem/casgem_scraper.py | 39 ++++++++++++---------------- setup.py | 2 +- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/collect/dwr/casgem/casgem_scraper.py b/collect/dwr/casgem/casgem_scraper.py index a98f9bc..d94c76c 100644 --- a/collect/dwr/casgem/casgem_scraper.py +++ b/collect/dwr/casgem/casgem_scraper.py @@ -4,6 +4,7 @@ access CASGEM well data """ # -*- coding: utf-8 -*- +from io import StringIO import os from bs4 import BeautifulSoup import pandas as pd @@ -14,27 +15,22 @@ from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC -try: - # Python 2.7 - from StringIO import StringIO -except ModuleNotFoundError: - # Python 3 - from io import StringIO - -def get_casgem_data(casgem_id=None, state_well_number=None, local_well_designation=None, master_site_code=None, write_to_html_file=False): +def get_casgem_data(casgem_id=None, + state_well_number=None, + local_well_designation=None, + master_site_code=None, + write_to_html_file=False): """ Download well timeseries data from CASGEM database; return as dataframe search term | type | example - + ---------------------------------------------------------------------- casgem_id | str | '34318' state_well_number | str | '19N02W36H001M' local_well_designation | str | '19N02W36H001M' master_site_code | str | '394564N1220246W001' - - Args: casgem_id (str): desc state_well_number (None): desc @@ -45,18 +41,15 @@ def get_casgem_data(casgem_id=None, state_well_number=None, local_well_designati Returns: dict """ - - if os.name == 'posix': - chromedriver = '/usr/local/bin/chromedriver' - elif os.name == 'windows': - # update Chromedriver to 2.36 (latest on Win32) - chromedriver = 'C:/Python27/Scripts/chromedriver' - - os.environ['webdriver.chrome.driver'] = chromedriver - - chrome_options = Options() - chrome_options.add_argument('--dns-prefetch-disable') - driver = webdriver.Chrome(chrome_options=chrome_options) + # if os.name == 'posix': + # chromedriver = '/usr/local/bin/chromedriver' + # elif os.name == 'windows': + # # update Chromedriver to 2.36 (latest on Win32) + # chromedriver = 'C:/Python27/Scripts/chromedriver' + # os.environ['webdriver.chrome.driver'] = chromedriver + # chrome_options = Options() + # chrome_options.add_argument('--dns-prefetch-disable') + driver = webdriver.Chrome() # fetch log in url url = 'https://www.casgem.water.ca.gov' diff --git a/setup.py b/setup.py index a186bf3..9ae7a02 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ 'python-dotenv==0.19.2', 'requests>=2.26.0', 'scipy>=1.8.0', - 'selenium==3.8.0', + 'selenium==4.15.2', 'tabula-py==2.4.0'], extras_require={'docs': ['Sphinx==4.3.0', 'sphinx-readable-theme==1.3.0', From 90f42c6d8b0fa8908fc184b88f098cbb2f1c9861 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Tue, 28 Nov 2023 10:30:13 -0800 Subject: [PATCH 09/36] Separate tests into files per resource --- collect/tests/test_alert.py | 95 +++++ collect/tests/test_basics.py | 798 ----------------------------------- collect/tests/test_cnrfc.py | 268 ++++++++++++ collect/tests/test_cvo.py | 71 ++++ collect/tests/test_dwr.py | 184 ++++++++ collect/tests/test_nid.py | 186 ++++++++ collect/tests/test_usace.py | 65 +++ collect/tests/test_usgs.py | 46 ++ collect/tests/test_utils.py | 33 ++ 9 files changed, 948 insertions(+), 798 deletions(-) create mode 100644 collect/tests/test_alert.py delete mode 100644 collect/tests/test_basics.py create mode 100644 collect/tests/test_cnrfc.py create mode 100644 collect/tests/test_cvo.py create mode 100644 collect/tests/test_dwr.py create mode 100644 collect/tests/test_nid.py create mode 100644 collect/tests/test_usace.py create mode 100644 collect/tests/test_usgs.py create mode 100644 collect/tests/test_utils.py diff --git a/collect/tests/test_alert.py b/collect/tests/test_alert.py new file mode 100644 index 0000000..758e499 --- /dev/null +++ b/collect/tests/test_alert.py @@ -0,0 +1,95 @@ +""" +collect.tests.test_basics +============================================================ +initial test suite for collect data access and utility functions; note: these tests require internet connection +""" +# -*- coding: utf-8 -*- +import datetime as dt +import unittest +import unittest.mock +from collect import alert + + +class TestSacAlert(unittest.TestCase): + + def test_get_site_notes(self): + """ + test the function for retrieving site metadata produces the expected entries + """ + result = alert.get_site_notes('1137') + self.assertEqual(result['site_id'], '1137') + self.assertEqual(result['Facility ID:'], 'A31') + self.assertEqual(result['Location:'], 'Upstream of Alpine Frost Dr. west of Bruceville Rd.') + self.assertEqual(result['Date Installed:'], '2/6/1994') + + def test_get_data(self): + result = alert.get_data('1137', + dt.datetime(2021, 3, 18, 14), + dt.datetime(2021, 3, 18, 20), + device_ids=[4], + ascending=True, + as_dataframe=True) + + # check the queried sensor values for the specified date range + self.assertEqual(result['data']['Value'].tolist(), + [0.0, 0.04, 0.0, 0.04, 0.04, 0.0, 0.0, 0.04, 0.0, 0.04, 0.04, 0.04, 0.0, 0.04, 0.0]) + + # check the associated date/time stamps + self.assertEqual(result['data']['Receive'].tolist()[:4], + ['2021-03-18 14:00:25', '2021-03-18 14:36:20', '2021-03-18 15:00:30', '2021-03-18 15:24:21']) + + def test_get_site_sensors(self): + """ + test the function for retrieving site metadata sensors list produces the expected number of entries + """ + self.assertEqual(len(alert.get_site_sensors(1122)['sensors']), 7) + + def test_get_sites(self): + """ + test the function for retrieving site list for a particular gage types returns the expected number of entries + """ + self.assertEqual(alert.get_sites(as_dataframe=True, datatype='rain').shape, (81, 12)) + self.assertEqual(alert.get_sites(as_dataframe=True, datatype='stream').shape, (37, 10)) + + def test_get_sites_from_list(self): + self.assertEqual(alert.get_sites_from_list(as_dataframe=True, sensor_class=None).shape, (128, 4)) + + def test_ustrip(self): + self.assertEqual(alert.alert._ustrip('\u00A0'), '') + + def test_get_site_location(self): + result = alert.get_site_location(1122) + self.assertEqual(result['latitude'], 38.6024722) + self.assertEqual(result['longitude'], -121.3951389) + + def test_get_query_url(self): + url = alert.get_query_url(1137, 3, dt.datetime(2023, 1, 1), dt.datetime(2023, 2, 1)) + expected_url = '&'.join([ + 'https://www.sacflood.org/export/file/?site_id=1137', + 'device_id=3', + 'mode=', + 'hours=', + 'data_start=2023-01-01%2000:00:00', + 'data_end=2023-02-01%2000:00:00', + 'tz=US%2FPacific', + 'format_datetime=%25Y-%25m-%25d+%25H%3A%25i%3A%25S', + 'mime=txt', + 'delimiter=comma' + ]) + self.assertEqual(url, expected_url) + + def test_get_device_series(self): + result = alert.get_device_series(1108, + 6, + dt.datetime(2023, 11, 27), + dt.datetime(2023, 11, 28), + ascending=True).head(4).values.tolist() + expected_result = [['2023-11-27 00:00:00', '2023-11-27 00:31:05', 50.38, 'ft', 'A'], + ['2023-11-27 00:15:00', '2023-11-27 01:31:05', 50.38, 'ft', 'A'], + ['2023-11-27 00:30:00', '2023-11-27 01:31:05', 50.38, 'ft', 'A'], + ['2023-11-27 00:45:00', '2023-11-27 01:31:05', 50.38, 'ft', 'A']] + self.assertEqual(result, expected_result) + + +if __name__ == '__main__': + unittest.main() diff --git a/collect/tests/test_basics.py b/collect/tests/test_basics.py deleted file mode 100644 index cde976d..0000000 --- a/collect/tests/test_basics.py +++ /dev/null @@ -1,798 +0,0 @@ -""" -collect.tests.test_basics -============================================================ -initial test suite for collect data access and utility functions; note: these tests require internet connection -""" -# -*- coding: utf-8 -*- -import datetime as dt -import io -import os -import textwrap -import unittest -import unittest.mock - -from dotenv import load_dotenv -import pandas as pd - -from collect.dwr import cdec -from collect.dwr import casgem -from collect.dwr import cawdl -from collect.dwr import b120 -from collect.dwr import swp - -from collect import alert -from collect import cnrfc -from collect import cvo -from collect import nid -from collect import usgs -from collect import utils -from collect.usace import wcds - - -class TestSacAlert(unittest.TestCase): - - def test_get_site_notes(self): - """ - test the function for retrieving site metadata produces the expected entries - """ - result = alert.get_site_notes('1137') - self.assertEqual(result['site_id'], '1137') - self.assertEqual(result['Facility ID:'], 'A31') - self.assertEqual(result['Location:'], 'Upstream of Alpine Frost Dr. west of Bruceville Rd.') - self.assertEqual(result['Date Installed:'], '2/6/1994') - - def test_get_data(self): - result = alert.get_data('1137', - dt.datetime(2021, 3, 18, 14), - dt.datetime(2021, 3, 18, 20), - device_ids=[4], - ascending=True, - as_dataframe=True) - - # check the queried sensor values for the specified date range - self.assertEqual(result['data']['Value'].tolist(), - [0.0, 0.04, 0.0, 0.04, 0.04, 0.0, 0.0, 0.04, 0.0, 0.04, 0.04, 0.04, 0.0, 0.04, 0.0]) - - # check the associated date/time stamps - self.assertEqual(result['data']['Receive'].tolist()[:4], - ['2021-03-18 14:00:25', '2021-03-18 14:36:20', '2021-03-18 15:00:30', '2021-03-18 15:24:21']) - - def test_get_site_sensors(self): - """ - test the function for retrieving site metadata sensors list produces the expected number of entries - """ - self.assertEqual(len(alert.get_site_sensors(1122)['sensors']), 7) - - def test_get_sites(self): - """ - test the function for retrieving site list for a particular gage types returns the expected number of entries - """ - self.assertEqual(alert.get_sites(as_dataframe=True, datatype='rain').shape, (81, 12)) - self.assertEqual(alert.get_sites(as_dataframe=True, datatype='stream').shape, (37, 10)) - - def test_get_sites_from_list(self): - self.assertEqual(alert.get_sites_from_list(as_dataframe=True, sensor_class=None).shape, (128, 4)) - - def test_ustrip(self): - self.assertEqual(alert.alert._ustrip('\u00A0'), '') - - def test_get_site_location(self): - result = alert.get_site_location(1122) - self.assertEqual(result['latitude'], 38.6024722) - self.assertEqual(result['longitude'], -121.3951389) - - def test_get_query_url(self): - url = alert.get_query_url(1137, 3, dt.datetime(2023, 1, 1), dt.datetime(2023, 2, 1)) - expected_url = '&'.join([ - 'https://www.sacflood.org/export/file/?site_id=1137', - 'device_id=3', - 'mode=', - 'hours=', - 'data_start=2023-01-01%2000:00:00', - 'data_end=2023-02-01%2000:00:00', - 'tz=US%2FPacific', - 'format_datetime=%25Y-%25m-%25d+%25H%3A%25i%3A%25S', - 'mime=txt', - 'delimiter=comma' - ]) - self.assertEqual(url, expected_url) - - def test_get_device_series(self): - result = alert.get_device_series(1108, - 6, - dt.datetime(2023, 11, 27), - dt.datetime(2023, 11, 28), - ascending=True).head(4).values.tolist() - expected_result = [['2023-11-27 00:00:00', '2023-11-27 00:31:05', 50.38, 'ft', 'A'], - ['2023-11-27 00:15:00', '2023-11-27 01:31:05', 50.38, 'ft', 'A'], - ['2023-11-27 00:30:00', '2023-11-27 01:31:05', 50.38, 'ft', 'A'], - ['2023-11-27 00:45:00', '2023-11-27 01:31:05', 50.38, 'ft', 'A']] - self.assertEqual(result, expected_result) - - -class TestCNRFC(unittest.TestCase): - - @property - def deterministic_frame(self): - """ - fixture for testing watershed deterministic file handling - """ - if not hasattr(self, '_deterministic_frame'): - text_data = io.StringIO(textwrap.dedent("""\ - GMT,CMPC1,NHGC1,MSGC1,FRGC1,EDOC1,SOSC1,MHBC1,MCNC1 - ,QINE,QINE,QINE,QINE,QINE,QINE,QINE,QINE - 2019-03-30 12:00:00,2.45972,0.70641,0.08901,0.22803,1.03512,0.71908,2.83132,2.58248 - 2019-03-30 13:00:00,2.44774,0.67366,0.08901,0.21302,1.03512,0.70908,2.88032,2.56875 - 2019-03-30 14:00:00,2.43568,0.67408,0.08901,0.19602,1.03011,0.71208,2.84732,2.53694 - 2019-03-30 15:00:00,2.42353,0.67424,0.08901,0.22903,1.02611,0.70608,2.83132,2.52791 - 2019-03-30 16:00:00,2.41129,0.67558,0.08901,0.20202,1.02211,0.70208,2.83132,2.50098 - 2019-03-30 17:00:00,2.39895,0.60832,0.08901,0.21002,1.01811,0.70208,2.81431,2.4876 - 2019-03-30 18:00:00,2.38652,0.64266,0.08901,0.18302,1.00911,0.69608,2.83132,2.46544 - 2019-03-30 19:00:00,2.38077,0.67591,0.08701,0.20202,1.00511,0.69208,2.79831,2.45222 - 2019-03-30 20:00:00,2.37473,0.67491,0.08701,0.18602,1.00111,0.69208,2.79831,2.44343 - 2019-03-30 21:00:00,2.36843,0.67599,0.08601,0.19602,0.99211,0.68908,2.79831,2.42595 - 2019-03-30 22:00:00,2.36185,0.67599,0.08601,0.03374,0.99211,0.68208,2.74931,2.41724 - 2019-03-30 23:00:00,2.35498,0.71033,0.08601,0.19102,0.98411,0.68208,2.78231,2.40856 - 2019-03-31 00:00:00,2.34785,0.67608,0.08401,0.16702,0.98011,0.67608,2.74931,2.39559 - 2019-03-31 01:00:00,2.32832,0.67508,0.08401,0.19902,0.97111,0.66607,2.7163,2.38698 - 2019-03-31 02:00:00,2.30886,0.67608,0.08401,0.16302,0.96311,0.65907,2.7003,2.36982 - 2019-03-31 03:00:00,2.28949,0.64274,0.08401,0.19302,0.96311,0.65607,2.7163,2.36555 - 2019-03-31 04:00:00,2.2702,0.6084,0.08401,0.03239,0.95511,0.66907,2.7163,2.34852 - 2019-03-31 05:00:00,2.25098,0.60724,0.08401,0.17702,0.94711,0.65907,2.6843,2.34004 - 2019-03-31 06:00:00,2.23185,0.64141,0.08401,0.15302,0.9261,0.65907,2.6683,2.33159 - 2019-03-31 07:00:00,2.22434,0.60915,0.08401,0.16402,0.9141,0.65607,2.6843,2.31896 - 2019-03-31 08:00:00,2.21675,0.5749,0.08201,0.17202,0.9141,0.66207,2.62029,2.3022 - 2019-03-31 09:00:00,2.2091,0.60815,0.08201,0.15802,0.9101,0.65907,2.63629,2.2897 - 2019-03-31 10:00:00,2.20137,0.64241,0.08101,0.16702,0.9141,0.65907,2.58829,2.27725 - 2019-03-31 11:00:00,2.19357,0.60924,0.08101,0.16802,0.9141,0.65907,2.57229,2.26486 - 2019-03-31 12:00:00,2.1857,0.57507,0.08101,0.15402,0.9101,0.65307,2.57229,2.25253 - 2019-03-31 13:00:00,2.17421,0.60832,0.08101,0.15102,0.9141,0.65307,2.58829,2.23544 - 2019-03-31 14:00:00,2.16274,0.64257,0.08101,0.18902,0.9101,0.65607,2.55728,2.21627 - 2019-03-31 15:00:00,2.15131,0.60832,0.08101,0.03094,0.9101,0.64907,2.57229,2.20199 - 2019-03-31 16:00:00,2.1399,0.54081,0.08101,0.14802,0.9061,0.64307,2.55728,2.18779 - 2019-03-31 17:00:00,2.12853,0.54081,0.08101,0.03072,0.9061,0.64607,2.57229,2.16429 - 2019-03-31 18:00:00,2.11718,0.57515,0.08101,0.14502,0.8981,0.64607,2.57229,2.15495 - 2019-03-31 19:00:00,2.11344,0.57523,0.08101,0.15802,0.9021,0.64007,2.55728,2.13637 - 2019-03-31 20:00:00,2.10957,0.57531,0.07901,0.14302,0.8981,0.64307,2.54128,2.13174 - 2019-03-31 21:00:00,2.10557,0.5764,0.07901,0.16502,0.8861,0.63707,2.55728,2.12713 - 2019-03-31 22:00:00,2.10143,0.63047,0.07901,0.15202,0.8901,0.62707,2.54128,2.11793 - 2019-03-31 23:00:00,2.09715,0.6617,0.07901,0.13502,0.8821,0.62707,2.54128,2.11793 - 2019-04-01 00:00:00,2.09274,0.64507,0.07901,0.03001,0.8781,0.61807,2.51028,2.11334 - 2019-04-01 01:00:00,2.08882,0.61182,0.07701,0.02992,0.8741,0.62107,2.52628,2.10875 - 2019-04-01 02:00:00,2.08483,0.51206,0.07701,0.02983,0.8701,0.61807,2.49528,2.09962 - 2019-04-01 03:00:00,2.08079,0.51205,0.07701,0.02974,0.8661,0.61207,2.48028,2.09506 - 2019-04-01 04:00:00,2.07668,0.51206,0.07701,0.02964,0.8621,0.61207,2.49528,2.09051 - 2019-04-01 05:00:00,2.07251,0.51206,0.07701,0.02955,0.8541,0.61507,2.48028,2.08144 - 2019-04-01 06:00:00,2.06829,0.51206,0.07701,0.02946,0.85109,0.62107,2.44927,2.07692 - 2019-04-01 07:00:00,2.07789,0.51206,0.07701,0.13001,0.84709,0.62407,2.43427,2.0679 - 2019-04-01 08:00:00,2.08712,0.51206,0.07701,0.02929,0.84709,0.63007,2.44927,2.0634 - 2019-04-01 09:00:00,2.09597,0.51206,0.07701,0.13502,0.84709,0.62107,2.41927,2.04996 - 2019-04-01 10:00:00,2.10444,0.50556,0.07701,0.02911,0.84709,0.63407,2.43427,2.04104 - 2019-04-01 11:00:00,2.11255,0.60507,0.07601,0.02903,0.84709,0.63407,2.41927,2.02772 - 2019-04-01 12:00:00,2.12029,0.63774,0.07601,0.02894,0.84709,0.62707,2.41927,2.01888 - 2019-04-01 13:00:00,2.12346,0.59182,0.07601,0.11601,0.85109,0.63707,2.38927,2.00568 - 2019-04-01 14:00:00,2.12662,0.55896,0.07601,0.11201,0.85109,0.63407,2.41927,1.99255 - 2019-04-01 15:00:00,2.1298,0.57073,0.07401,0.12301,0.85109,0.62707,2.40427,1.98384 - 2019-04-01 16:00:00,2.13297,0.5924,0.07401,0.12401,0.85109,0.63007,2.43427,1.97516 - 2019-04-01 17:00:00,2.13613,0.54539,0.07401,0.12901,0.84709,0.62707,2.41927,1.96652 - 2019-04-01 18:00:00,2.13929,0.53298,0.07401,0.12101,0.85109,0.63007,2.25725,1.95791 - 2019-04-01 19:00:00,2.14021,0.56206,0.07301,0.10801,0.84309,0.62107,2.25725,1.95791 - 2019-04-01 20:00:00,2.14111,0.56231,0.07301,0.12001,0.84309,0.62107,2.27225,1.95361 - 2019-04-01 21:00:00,2.142,0.52906,0.07301,0.10601,0.83909,0.61807,2.27225,1.94932""")) - self._deterministic_frame = pd.read_csv(text_data, - header=0, - skiprows=[1,], - nrows=60, - parse_dates=True, - index_col=0, - float_precision='high', - dtype={'GMT': str}).mul(1000) - return self._deterministic_frame - - def test_cnrfc_credentials(self): - """ - load sensitive info from .env file and test CNRFC credentials exist - """ - load_dotenv() - self.assertTrue(('CNRFC_USER' in os.environ) & ('CNRFC_PASSWORD' in os.environ)) - - def test_convert_date_columns(self): - """Ensure datetime data converted to string format""" - test_index = self.deterministic_frame.index.strftime('%Y-%m-%d') - self.assertEqual(test_index.tolist()[0], '2019-03-30') - - def test_validate_duration(self): - """ - function to properly format/case hourly or daily durations - """ - duration = 'Hourly' - self.assertEqual(cnrfc.cnrfc._validate_duration(duration), 'hourly') - - def test_validate_duration_invalid(self): - """ - test that invalid duration raises a ValueError - """ - bad_input = 'monthly' - self.assertRaises(ValueError, - cnrfc.cnrfc._validate_duration, - bad_input) - - def test_get_deterministic_forecast(self): - """ - Test that deterministic forecast start from Graphical_RVF page matches - CSV start of forecast - """ - cnrfc_id = 'FOLC1' - first_ordinate = cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=True)[-1] - df = cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False)['data'] - first_forecast_entry = df['forecast'].dropna().index.tolist()[0] - - # check that the date/time representation in the timestamp and datetime.datetime objects are the same - self.assertEqual(first_forecast_entry.year, first_ordinate.year) - self.assertEqual(first_forecast_entry.month, first_ordinate.month) - self.assertEqual(first_forecast_entry.day, first_ordinate.day) - self.assertEqual(first_forecast_entry.hour, first_ordinate.hour) - self.assertEqual(first_forecast_entry.minute, first_ordinate.minute) - - # for now, strip the local tzinfo from `first_ordinate` - self.assertEqual(first_forecast_entry.tzinfo, first_ordinate.replace(tzinfo=None).tzinfo) - - def test_get_deterministic_forecast_watershed(self): - """ - test watershed deterministic forecast download for North San Joaquin on a particular date - """ - df = cnrfc.get_deterministic_forecast_watershed('N_SanJoaquin', '2019040412')['data'] - self.assertEqual(df.head(20)['NHGC1'].values.tolist(), - self.deterministic_frame.head(20)['NHGC1'].values.tolist()) - - def test_get_water_year_trend_tabular(self): - """ - test watershed deterministic forecast download for North San Joaquin on a particular date - """ - df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] - self.assertEqual(df.shape, (365, 9)) - - # def test_get_seasonal_trend_tabular(self): - # cnrfc.get_seasonal_trend_tabular(cnrfc_id, water_year) - - # def test_get_water_year_trend_tabular(self): - # cnrfc.get_water_year_trend_tabular(cnrfc_id, water_year) - - # def test_get_deterministic_forecast(self): - # cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False, release=False) - - # def test_get_deterministic_forecast_watershed(self): - # cnrfc.get_deterministic_forecast_watershed(watershed, - # date_string, - # acre_feet=False, - # pdt_convert=False, - # as_pdt=False, - # cnrfc_id=None) - - # def test_get_forecast_meta_deterministic(self): - # cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=False, release=False) - - # def test_get_ensemble_forecast(self): - # cnrfc.get_ensemble_forecast(cnrfc_id, duration, acre_feet=False, pdt_convert=False, as_pdt=False) - - # def test_get_ensemble_forecast_watershed(self): - # cnrfc.get_ensemble_forecast_watershed(watershed, - # duration, - # date_string, - # acre_feet=False, - # pdt_convert=False, - # as_pdt=False, - # cnrfc_id=None) - - # def test_download_watershed_file(self): - # cnrfc.download_watershed_file(watershed, date_string, forecast_type, duration=None, path=None) - - # def test_get_watershed_forecast_issue_time(self): - # cnrfc.get_watershed_forecast_issue_time(duration, watershed, date_string=None, deterministic=False) - - # def test_get_watershed(self): - # cnrfc.get_watershed(cnrfc_id) - - # def test_get_ensemble_first_forecast_ordinate(self): - # cnrfc.get_ensemble_first_forecast_ordinate(url=None, df=None) - - # def test_get_ensemble_product_url(self): - # cnrfc.get_ensemble_product_url(product_id, cnrfc_id, data_format='') - - # def test_get_ensemble_product_1(self): - # cnrfc.get_ensemble_product_1(cnrfc_id) - - # def test_get_ensemble_product_2(self): - # cnrfc.get_ensemble_product_2(cnrfc_id) - - # def test_get_ensemble_product_3(self): - # cnrfc.get_ensemble_product_3(cnrfc_id) - - # def test_get_ensemble_product_5(self): - # cnrfc.get_ensemble_product_5(cnrfc_id) - - # def test_get_ensemble_product_6(self): - # cnrfc.get_ensemble_product_6(cnrfc_id) - - # def test_get_ensemble_product_10(self): - # cnrfc.get_ensemble_product_10(cnrfc_id) - - # def test_get_ensemble_product_11(self): - # cnrfc.get_ensemble_product_11(cnrfc_id) - - # def test_get_ensemble_product_12(self): - # cnrfc.get_ensemble_product_12(cnrfc_id) - - # def test_get_ensemble_product_13(self): - # cnrfc.get_ensemble_product_13(cnrfc_id) - - # def test_get_data_report_part_8(self): - # cnrfc.get_data_report_part_8() - - # def test_get_monthly_reservoir_storage_summary(self): - # cnrfc.get_monthly_reservoir_storage_summary() - - # def test_esp_trace_analysis_wrapper(self): - # cnrfc.esp_trace_analysis_wrapper() - - # def test__apply_conversions(self): - # cnrfc._apply_conversions(df, duration, acre_feet, pdt_convert, as_pdt) - - # def test__get_cnrfc_restricted_content(self): - # cnrfc._get_cnrfc_restricted_content(url) - - # def test__get_forecast_csv(self): - # cnrfc._get_forecast_csv(url) - - # def test_get_forecast_csvdata(self): - # cnrfc.get_forecast_csvdata(url) - - # def test_get_rating_curve(self): - # cnrfc.get_rating_curve(cnrfc_id) - - # def test__default_date_string(self): - # cnrfc._default_date_string(date_string) - - # def test__parse_blue_table(self): - # cnrfc._parse_blue_table(table_soup) - - -# class TestCASGEM(unittest.TestCase): - - # def test_get_casgem_data(self): - # result = casgem.get_casgem_data(casgem_id=None, - # state_well_number=None, - # local_well_designation=None, - # master_site_code=None, - # write_to_html_file=False) - # print(result) - - -# class TestCAWDL(unittest.TestCase): - - # def test_get_cawdl_data(self): - # cawdl.get_cawdl_data(site_id) - - # def test_get_cawdl_surface_water_data(self): - # cawdl.get_cawdl_surface_water_data(site_id, water_year, variable, interval=None) - - # def test_get_cawdl_surface_water_por(self): - # cawdl.get_cawdl_surface_water_por(site_id, variable, interval=None) - - # def test_get_cawdl_surface_water_site_report(self): - # cawdl.get_cawdl_surface_water_site_report(site_id) - - -# class TestCDEC(unittest.TestCase): - - # def test_get_b120_data(self): - # b120.get_b120_data(date_suffix='') - - # def test_validate_date_suffix(self): - # b120.validate_date_suffix(date_suffix, min_year=2017) - - # def test_clean_td(self): - # b120.clean_td(text) - - # def test_get_b120_update_data(self): - # b120.get_b120_update_data(date_suffix='') - - # def test_get_120_archived_reports(self): - # b120.get_120_archived_reports(year, month) - - # def test_april_july_dataframe(self): - # b120.april_july_dataframe(data_list) - - # def test_get_station_url(self): - # cdec.get_station_url(station, start, end, data_format='CSV', sensors=[], duration='') - - # def test_get_station_sensors(self): - # cdec.get_station_sensors(station, start, end) - - # def test_get_station_data(self): - # cdec.get_station_data(station, start, end, sensors=[], duration='') - - # def test_get_raw_station_csv(self): - # cdec.get_raw_station_csv(station, start, end, sensors=[], duration='', filename='') - - # def test_get_raw_station_json(self): - # cdec.get_raw_station_json(station, start, end, sensors=[], duration='', filename='') - - # def test_get_sensor_frame(self): - # cdec.get_sensor_frame(station, start, end, sensor='', duration='') - - # def test_get_station_metadata(self): - # cdec.get_station_metadata(station, as_geojson=False) - - # def test_get_dam_metadata(self): - # cdec.get_dam_metadata(station) - - # def test_get_reservoir_metadata(self): - # cdec.get_reservoir_metadata(station) - - # def test__get_table_index(self): - # cdec._get_table_index(table_type, tables) - - # def test__parse_station_generic_table(self): - # cdec._parse_station_generic_table(table) - - # def test__parse_station_sensors_table(self): - # cdec._parse_station_sensors_table(table) - - # def test__parse_station_comments_table(self): - # cdec._parse_station_comments_table(table) - - # def test__parse_data_available(self): - # cdec._parse_data_available(text) - - # def test_get_data(self): - # cdec.get_data(station, start, end, sensor='', duration='') - - # def test_get_daily_snowpack_data(self): - # cdec.get_daily_snowpack_data(region, start, end) - - -# class TestCVO(unittest.TestCase): - - # def test(self): - # pass - - # # prn test - # result = cvo.get_data(dt.date(2000, 2, 1), dt.date(2011, 3, 31), 'doutdly') - - # # pdf test - # result = cvo.get_data(dt.date(2013, 12, 1), dt.date(2014, 1, 31), 'doutdly') - # result = cvo.get_data(dt.date(2000, 2, 1), dt.date(2023, 5, 1), 'shafln') - # result = cvo.get_data(dt.date(2012, 6, 1), dt.date(2013, 12, 31), 'slunit') - # result = cvo.get_data(dt.date(2020, 6, 1), dt.date(2021, 1, 1), 'fedslu') - # result = cvo.get_data(dt.date(2021, 1, 10), dt.date.now(), 'shadop') - # result = cvo.get_data(dt.date(2023, 5, 1), dt.date.now(), 'kesdop') - - # def test_get_area(self): - # cvo.get_area(date_structure, report_type) - - # def test_get_data(self): - # cvo.get_data(start, end, report_type) - - # def test_get_date_published(self): - # cvo.get_date_published(url, date_structure, report_type) - - # def test_get_report_columns(self): - # cvo.get_report_columns(report_type, date_structure, expected_length=None, default=False) - - # def test_get_report(self): - # cvo.get_report(date_structure, report_type) - - # def test_get_title(self): - # cvo.get_title(report_type) - - # def test_get_url(self): - # cvo.get_url(date_structure, report_type) - - # def test_months_between(self): - # cvo.months_between(start_date, end_date) - - # def test_doutdly_data_cleaner(self): - # cvo.doutdly_data_cleaner(content, report_type, date_structure) - - # def test_load_pdf_to_dataframe(self): - # cvo.load_pdf_to_dataframe(content, date_structure, report_type, to_csv=False) - - # def test_download_files(self): - # cvo.download_files(start, end, report_type, destination='.') - - -class TestNID(unittest.TestCase): - - @property - def sample_daily_data(self): - if not hasattr(self, '_sample_daily_data'): - self._sample_daily_data = io.StringIO(textwrap.dedent("""\ - Nevada Irrigation District USDAY V123 Output 11/22/2023 - - Summary Report - - Site: DC900 Scott's Flat Reservoir - USGS #: - Beginning Date: 01/01/2023 - Ending Date: 12/31/2023 - - Daily 2400 Storage Volume in Acre-Feet Water Year Jan 2023 to Dec 2023 - - 12/31/2022 44500 - - Day JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC - ------------------------------------------------------------------------------------------------------------------------------------ - 1 45300 48500 48500 48500 48500 47800 47700 45100 42400 40100 - 2 45800 48500 48500 48500 48500 47800 47600 45000 42300 40100 - 3 46200 48500 48500 48500 48500 47900 47500 44900 42300 40000 - 4 46400 48500 48500 48500 48500 48000 47500 44800 42200 40000 - 5 46900 48500 48500 48500 48500 48000 47400 44700 42100 39900 - - 6 47300 48500 48500 48500 48500 48000 47400 44600 42000 39900 - 7 47500 48500 48500 48500 48500 48100 47300 44500 42000 39800 - 8 47800 48500 48500 48500 48500 48000 47200 44400 41900 39800 - 9 48500 48500 48500 48500 48500 48000 47200 44400 41800 39800 - 10 48500 48500 48500 48500 48500 48000 47100 44300 41700 39700 - - 11 48500 48500 48500 48500 48500 48000 47000 44200 41600 39700 - 12 48500 48500 48500 48500 48500 48000 47000 44100 41500 39700 - 13 48500 48500 48500 48500 48400 48100 46900 44000 41400 39600 - 14 48500 48500 48500 48500 48400 48100 46800 43900 41400 39600 - 15 48500 48500 48500 48500 48300 48100 46700 43800 41300 39700 - - 16 48500 48500 48500 48500 48300 48100 46600 43700 41200 39800 - 17 48500 48500 48500 48500 48200 48100 46500 43600 41100 39800 - 18 48500 48500 48500 48500 48100 48000 46400 43500 41000 39900 - 19 48500 48500 48500 48500 48100 48000 46400 43400 40900 40000 - 20 48500 48400 48500 48500 48000 48000 46300 43400 40900 40100 - - 21 48500 48400 48500 48500 47900 48000 46200 43300 40800 40100 - 22 48500 48400 48500 48500 47800 48000 46100 43200 40700 40200 - 23 48500 48300 48500 48500 47800 47900 46000 43200 40600 - 24 48500 48400 48500 48500 47700 47900 45900 43100 40600 - 25 48500 48300 48500 48500 47600 47900 45800 43000 40500 - - 26 48500 48400 48500 48500 47600 47900 45700 42900 40400 - 27 48500 48500 48500 48500 47600 47800 45600 42800 40400 - 28 48500 48500 48500 48500 47600 47800 45500 42700 40300 - 29 48500 ------ 48500 48500 47700 47700 45400 42600 40300 - 30 48500 ------ 48500 48500 47700 47700 45300 42500 40200 - 31 48500 ------ 48500 ------ 47700 ------ 45200 42400 ------ ------ - - Max 48500 48500 48500 48500 48500 48100 47700 45100 42400 40200 - Min 45300 48300 48500 48500 47600 47700 45200 42400 40200 39600 - Change 4000 0 0 0 -800 0 -2500 -2800 -2200 - - Cal Year 2023 Mean 46300 Max 48500 Min 39600 Inst Max 48500 - - ------------------ Notes ------------------- - All recorded data is continuous and reliable - """)) - return self._sample_daily_data - - def test_get_sites(self): - result = nid.get_sites() - expected_dict = {'BR100': 'Auburn Ravine I at Head', - 'BR220': 'Hemphill Canal at Head', - 'BR301': 'Combie Phase I at Head', - 'BR334': 'Camp Far West at Head', - 'BR368': 'Gold Hill I at Head', - 'BR900': 'Combie Reservoir-Spill-1600.', - 'BSCA': 'Bowman-Spaulding Canal Intake Near Graniteville, Ca', - 'BWMN': 'Bowman Lake Near Graniteville, Ca', - 'CPFL': 'Chicago Park Flume Near Dutch Flat, Ca', - 'DC102': 'Cascade at Head', - 'DC131': 'Newtown Canal at Head', - 'DC140': 'Tunnel Canal at Head', - 'DC145': 'D. S. Canal at Head', - 'DC169': 'Tarr Canal at Head', - 'DC900': "Scott's Flat Reservoir", - 'DFFL': 'Dutch Flat #2 Flume Near Blue Canyon, Ca', - 'FAUC': 'Faucherie Lake Near Cisco, Ca', - 'FRLK': 'French Lake Near Cisco Grove, Ca', - 'JKSN': 'Jackson Lake near Sierra City', - 'JMDW': 'Jackson Meadows Reservoir Near Sierra City, Ca', - 'MBTO': 'Milton-Bowman Tunnel Outlet (South Portal)', - 'ROLK': 'Rollins Reservoir Near Colfax, Ca', - 'SWML': 'Sawmill Lake Near Graniteville, Ca', - 'WLSN': 'Wilson Creek near Sierra City'} - self.assertEqual(result, expected_dict) - - def test_get_issue_date(self): - result = nid.get_issue_date() - self.assertTrue(isinstance(result, dt.datetime)) - self.assertLess(result, dt.datetime.now()) - - def test_get_site_files(self): - site = 'DC140' - result = nid.get_site_files('DC140') - self.assertEqual(sorted(result), [f'{site}.adesc.pdf', - f'{site}.csv_flow.csv', - f'{site}.plot_flow.png', - f'{site}.usday_daily_flow.txt']) - - def test_get_site_metric(self): - self.assertEqual(nid.get_site_metric('BR334', interval='daily'), 'flow') - - def test_get_station_url(self): - self.assertEqual(nid.get_station_url('ROLK', metric='index', interval=None), - 'https://river-lake.nidwater.com/hyquick/ROLK/index.htm') - - self.assertEqual(nid.get_station_url('ROLK', metric='flow', interval='daily'), - 'https://river-lake.nidwater.com/hyquick/ROLK/ROLK.usday_daily_flow.txt') - - self.assertEqual(nid.get_station_url('ROLK', metric='flow', interval='hourly'), - 'https://river-lake.nidwater.com/hyquick/ROLK/ROLK.csv_flow.csv') - - def test_get_daily_data(self): - result = nid.get_daily_data('DC900', json_compatible=False) - year = result['info']['year'] - self.assertEqual(result['data'].head(4).index.strftime('%Y-%m-%d').tolist(), - [f'{year}-01-01', f'{year}-01-02', f'{year}-01-03', f'{year}-01-04']) - - def test_get_daily_meta(self): - url = 'https://river-lake.nidwater.com/hyquick/DC140/DC140.usday_daily_flow.txt' - result = nid.get_daily_meta(url=url, content=None) - self.assertEqual(result['Site'], 'DC140 Tunnel Canal at Head') - self.assertEqual(result['USGS #'], 'NO') - self.assertEqual(result['version'], 'USDAY V123') - - def test_get_hourly_data(self): - result = nid.get_hourly_data('WLSN', json_compatible=False) - sample = result['data'].head() - self.assertEqual(sample.index.strftime('%Y-%m-%d %H:%M:%S').tolist(), - ['2022-01-01 01:00:00', - '2022-01-01 02:00:00', - '2022-01-01 03:00:00', - '2022-01-01 04:00:00', - '2022-01-01 05:00:00']) - self.assertEqual(sample['Amount Diverted (AF)'].tolist(), [0.15, 0.15, 0.15, 0.15, 0.15]) - - def test_parse_qualifiers(self): - series = pd.Series(data=['Qualities:', - '2 - Good quality edited data', - '22 - Raw Satellite Data', - '28 - Radio data', - '255 - No data exists'], - name='Site Information') - self.assertEqual(nid.parse_qualifiers(series), - {'2': 'Good quality edited data', - '22': 'Raw Satellite Data', - '28': 'Radio data', - '255': 'No data exists'}) - - def test_serialize(self): - df = pd.DataFrame(index=pd.date_range('2020-12-01', '2020-12-03', freq='D'), - data={'VALUE': [42] * 3}) - self.assertEqual(nid.serialize(df.copy(), day_format='%Y-%m-%d'), - {'VALUE': {'2020-12-01 00:00': 42, '2020-12-02 00:00': 42, '2020-12-03 00:00': 42}}) - self.assertEqual(nid.serialize(df.copy(), day_format='%Y-%-m-%-d'), - {'VALUE': {'2020-12-1 00:00': 42, '2020-12-2 00:00': 42, '2020-12-3 00:00': 42}}) - - -# class TestSWP(unittest.TestCase): - - # def test_prompt_installation_and_exit(self): - # swp.prompt_installation_and_exit() - - # def test_get_report_catalog(self): - # swp.get_report_catalog() - - # def test_get_report_url(self): - # swp.get_report_url() - - # def test_get_raw_text(self): - # swp.get_raw_text() - - # def test_get_delta_daily_data(self): - # swp.get_delta_daily_data() - - # def test_get_barker_slough_data(self): - # swp.get_barker_slough_data() - - # def test_get_oco_tabular_data(self): - # swp.get_oco_tabular_data() - - -class TestUSACE(unittest.TestCase): - - def test_get_water_year_data(self): - result = wcds.get_water_year_data('buc', 2021, interval='d') - self.assertEqual(result['data'].shape, (397, 16)) - - sample = result['data'].head(4) - self.assertEqual(result['data'].head(4)['Top of Conservation (ac-ft)'].tolist(), - [149521.45, 149042.90, 148564.35, 148085.80]) - - # does not include timezone handling - self.assertEqual(list(map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'), result['data'].head(4).index.tolist())), - ['2020-08-31 00:00:00', - '2020-09-01 00:00:00', - '2020-09-02 00:00:00', - '2020-09-03 00:00:00']) - - # does not include timezone handling - self.assertEqual(list(map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'), result['data'].tail(4).index.tolist())), - ['2021-09-28 00:00:00', - '2021-09-29 00:00:00', - '2021-09-30 00:00:00', - '2021-10-01 00:00:00']) - - def test_get_data(self): - result = wcds.get_wcds_data('sha', dt.datetime(2023, 1, 15), dt.datetime(2023, 2, 1), interval='d') - self.assertEqual(result['data'].shape, (398, 16)) - self.assertEqual(result['data']['Storage'].tolist()[:4], [1592122.0, 1590203.0, 1585627.0, 1582232.0]) - - def test_get_wcds_reservoirs(self): - """ - show that 35 reservoirs exist in the internal collect record for WCDS reservoirs - """ - self.assertEqual(wcds.get_wcds_reservoirs().shape[0], 35) - - def test_get_wcds_data(self): - result = wcds.get_wcds_data('sha', dt.datetime(2023, 1, 15), dt.datetime(2023, 2, 1), interval='d') - self.assertEqual(result['data'].shape, (398, 16)) - self.assertEqual(result['data']['Storage'].tolist()[:4], [1592122.0, 1590203.0, 1585627.0, 1582232.0]) - - def test_get_release_report(self): - self.assertEqual(wcds.get_release_report('buc')['info']['units'], 'cfs') - self.assertGreater(wcds.get_release_report('buc')['data'].shape[0], 0) - - def test_get_reservoir_metadata(self): - result = wcds.get_reservoir_metadata('nhg', 2022, interval='d') - self.assertEqual(int(result['gross pool (stor)']), 317100) - self.assertEqual(int(result['gross pool (elev)']), 713) - self.assertTrue('Precip @ Dam (in; elev 712 ft)' in result['data headers']) - - -class TestUSGS(unittest.TestCase): - - def test_get_query_url(self): - url = usgs.get_query_url(11418500, '00060', dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 5), 'instantaneous') - expected_url = '&'.join(['https://waterservices.usgs.gov/nwis/iv/?format=json', - 'sites=11418500', - 'startDT=2023-01-01T00:00:00', - 'endDT=2023-01-05T00:00:00', - 'parameterCd=00060', - 'siteStatus=all']) - self.assertEqual(url, expected_url) - - def test_get_data(self): - result = usgs.get_data(11418500, '00060', dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 5), interval='daily') - self.assertEqual(result['data']['00060'].tolist(), [1280.0, 341.0, 351.0, 260.0, 1790.0]) - self.assertEqual(result['data'].index.strftime('%Y-%m-%d').tolist(), - ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']) - - def test_get_usgs_data(self): - result = usgs.get_usgs_data(11418500, '00060', dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 5), interval='daily') - self.assertEqual(result['data']['00060'].tolist(), [1280.0, 341.0, 351.0, 260.0, 1790.0]) - self.assertEqual(result['data'].index.strftime('%Y-%m-%d').tolist(), - ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']) - - def test_get_peak_streamflow(self): - result = usgs.get_peak_streamflow(11418500)['data'][['peak_va']] - self.assertEqual(result.head()['peak_va'].tolist(), - ['14000', '6260', '7520', '10800', '2400']) - self.assertEqual(result.head().index.strftime('%Y-%m-%d').tolist(), - ['1928-02-29', '1936-02-21', '1937-02-04', '1937-12-11', '1939-03-08']) - - -class TestUtils(unittest.TestCase): - - # def test_get_session_response(self): - # utils.get_session_response(url) - - # def test_get_web_status(self): - # utils.get_web_status(url) - - # def test_clean_fixed_width_headers(self): - # utils.clean_fixed_width_headers(columns) - - def test_get_water_year(self): - self.assertEqual(utils.get_water_year(dt.datetime(2023, 5, 12)), 2023) - self.assertEqual(utils.get_water_year(dt.datetime(2023, 11, 12)), 2024) - - -if __name__ == '__main__': - unittest.main() diff --git a/collect/tests/test_cnrfc.py b/collect/tests/test_cnrfc.py new file mode 100644 index 0000000..1ebf08d --- /dev/null +++ b/collect/tests/test_cnrfc.py @@ -0,0 +1,268 @@ +""" +collect.tests.test_basics +============================================================ +initial test suite for collect data access and utility functions; note: these tests require internet connection +""" +# -*- coding: utf-8 -*- +import datetime as dt +import io +import os +import textwrap +import unittest +import unittest.mock + +from dotenv import load_dotenv +import pandas as pd + +from collect import cnrfc + + +class TestCNRFC(unittest.TestCase): + + @property + def deterministic_frame(self): + """ + fixture for testing watershed deterministic file handling + """ + if not hasattr(self, '_deterministic_frame'): + text_data = io.StringIO(textwrap.dedent("""\ + GMT,CMPC1,NHGC1,MSGC1,FRGC1,EDOC1,SOSC1,MHBC1,MCNC1 + ,QINE,QINE,QINE,QINE,QINE,QINE,QINE,QINE + 2019-03-30 12:00:00,2.45972,0.70641,0.08901,0.22803,1.03512,0.71908,2.83132,2.58248 + 2019-03-30 13:00:00,2.44774,0.67366,0.08901,0.21302,1.03512,0.70908,2.88032,2.56875 + 2019-03-30 14:00:00,2.43568,0.67408,0.08901,0.19602,1.03011,0.71208,2.84732,2.53694 + 2019-03-30 15:00:00,2.42353,0.67424,0.08901,0.22903,1.02611,0.70608,2.83132,2.52791 + 2019-03-30 16:00:00,2.41129,0.67558,0.08901,0.20202,1.02211,0.70208,2.83132,2.50098 + 2019-03-30 17:00:00,2.39895,0.60832,0.08901,0.21002,1.01811,0.70208,2.81431,2.4876 + 2019-03-30 18:00:00,2.38652,0.64266,0.08901,0.18302,1.00911,0.69608,2.83132,2.46544 + 2019-03-30 19:00:00,2.38077,0.67591,0.08701,0.20202,1.00511,0.69208,2.79831,2.45222 + 2019-03-30 20:00:00,2.37473,0.67491,0.08701,0.18602,1.00111,0.69208,2.79831,2.44343 + 2019-03-30 21:00:00,2.36843,0.67599,0.08601,0.19602,0.99211,0.68908,2.79831,2.42595 + 2019-03-30 22:00:00,2.36185,0.67599,0.08601,0.03374,0.99211,0.68208,2.74931,2.41724 + 2019-03-30 23:00:00,2.35498,0.71033,0.08601,0.19102,0.98411,0.68208,2.78231,2.40856 + 2019-03-31 00:00:00,2.34785,0.67608,0.08401,0.16702,0.98011,0.67608,2.74931,2.39559 + 2019-03-31 01:00:00,2.32832,0.67508,0.08401,0.19902,0.97111,0.66607,2.7163,2.38698 + 2019-03-31 02:00:00,2.30886,0.67608,0.08401,0.16302,0.96311,0.65907,2.7003,2.36982 + 2019-03-31 03:00:00,2.28949,0.64274,0.08401,0.19302,0.96311,0.65607,2.7163,2.36555 + 2019-03-31 04:00:00,2.2702,0.6084,0.08401,0.03239,0.95511,0.66907,2.7163,2.34852 + 2019-03-31 05:00:00,2.25098,0.60724,0.08401,0.17702,0.94711,0.65907,2.6843,2.34004 + 2019-03-31 06:00:00,2.23185,0.64141,0.08401,0.15302,0.9261,0.65907,2.6683,2.33159 + 2019-03-31 07:00:00,2.22434,0.60915,0.08401,0.16402,0.9141,0.65607,2.6843,2.31896 + 2019-03-31 08:00:00,2.21675,0.5749,0.08201,0.17202,0.9141,0.66207,2.62029,2.3022 + 2019-03-31 09:00:00,2.2091,0.60815,0.08201,0.15802,0.9101,0.65907,2.63629,2.2897 + 2019-03-31 10:00:00,2.20137,0.64241,0.08101,0.16702,0.9141,0.65907,2.58829,2.27725 + 2019-03-31 11:00:00,2.19357,0.60924,0.08101,0.16802,0.9141,0.65907,2.57229,2.26486 + 2019-03-31 12:00:00,2.1857,0.57507,0.08101,0.15402,0.9101,0.65307,2.57229,2.25253 + 2019-03-31 13:00:00,2.17421,0.60832,0.08101,0.15102,0.9141,0.65307,2.58829,2.23544 + 2019-03-31 14:00:00,2.16274,0.64257,0.08101,0.18902,0.9101,0.65607,2.55728,2.21627 + 2019-03-31 15:00:00,2.15131,0.60832,0.08101,0.03094,0.9101,0.64907,2.57229,2.20199 + 2019-03-31 16:00:00,2.1399,0.54081,0.08101,0.14802,0.9061,0.64307,2.55728,2.18779 + 2019-03-31 17:00:00,2.12853,0.54081,0.08101,0.03072,0.9061,0.64607,2.57229,2.16429 + 2019-03-31 18:00:00,2.11718,0.57515,0.08101,0.14502,0.8981,0.64607,2.57229,2.15495 + 2019-03-31 19:00:00,2.11344,0.57523,0.08101,0.15802,0.9021,0.64007,2.55728,2.13637 + 2019-03-31 20:00:00,2.10957,0.57531,0.07901,0.14302,0.8981,0.64307,2.54128,2.13174 + 2019-03-31 21:00:00,2.10557,0.5764,0.07901,0.16502,0.8861,0.63707,2.55728,2.12713 + 2019-03-31 22:00:00,2.10143,0.63047,0.07901,0.15202,0.8901,0.62707,2.54128,2.11793 + 2019-03-31 23:00:00,2.09715,0.6617,0.07901,0.13502,0.8821,0.62707,2.54128,2.11793 + 2019-04-01 00:00:00,2.09274,0.64507,0.07901,0.03001,0.8781,0.61807,2.51028,2.11334 + 2019-04-01 01:00:00,2.08882,0.61182,0.07701,0.02992,0.8741,0.62107,2.52628,2.10875 + 2019-04-01 02:00:00,2.08483,0.51206,0.07701,0.02983,0.8701,0.61807,2.49528,2.09962 + 2019-04-01 03:00:00,2.08079,0.51205,0.07701,0.02974,0.8661,0.61207,2.48028,2.09506 + 2019-04-01 04:00:00,2.07668,0.51206,0.07701,0.02964,0.8621,0.61207,2.49528,2.09051 + 2019-04-01 05:00:00,2.07251,0.51206,0.07701,0.02955,0.8541,0.61507,2.48028,2.08144 + 2019-04-01 06:00:00,2.06829,0.51206,0.07701,0.02946,0.85109,0.62107,2.44927,2.07692 + 2019-04-01 07:00:00,2.07789,0.51206,0.07701,0.13001,0.84709,0.62407,2.43427,2.0679 + 2019-04-01 08:00:00,2.08712,0.51206,0.07701,0.02929,0.84709,0.63007,2.44927,2.0634 + 2019-04-01 09:00:00,2.09597,0.51206,0.07701,0.13502,0.84709,0.62107,2.41927,2.04996 + 2019-04-01 10:00:00,2.10444,0.50556,0.07701,0.02911,0.84709,0.63407,2.43427,2.04104 + 2019-04-01 11:00:00,2.11255,0.60507,0.07601,0.02903,0.84709,0.63407,2.41927,2.02772 + 2019-04-01 12:00:00,2.12029,0.63774,0.07601,0.02894,0.84709,0.62707,2.41927,2.01888 + 2019-04-01 13:00:00,2.12346,0.59182,0.07601,0.11601,0.85109,0.63707,2.38927,2.00568 + 2019-04-01 14:00:00,2.12662,0.55896,0.07601,0.11201,0.85109,0.63407,2.41927,1.99255 + 2019-04-01 15:00:00,2.1298,0.57073,0.07401,0.12301,0.85109,0.62707,2.40427,1.98384 + 2019-04-01 16:00:00,2.13297,0.5924,0.07401,0.12401,0.85109,0.63007,2.43427,1.97516 + 2019-04-01 17:00:00,2.13613,0.54539,0.07401,0.12901,0.84709,0.62707,2.41927,1.96652 + 2019-04-01 18:00:00,2.13929,0.53298,0.07401,0.12101,0.85109,0.63007,2.25725,1.95791 + 2019-04-01 19:00:00,2.14021,0.56206,0.07301,0.10801,0.84309,0.62107,2.25725,1.95791 + 2019-04-01 20:00:00,2.14111,0.56231,0.07301,0.12001,0.84309,0.62107,2.27225,1.95361 + 2019-04-01 21:00:00,2.142,0.52906,0.07301,0.10601,0.83909,0.61807,2.27225,1.94932""")) + self._deterministic_frame = pd.read_csv(text_data, + header=0, + skiprows=[1,], + nrows=60, + parse_dates=True, + index_col=0, + float_precision='high', + dtype={'GMT': str}).mul(1000) + return self._deterministic_frame + + def test_cnrfc_credentials(self): + """ + load sensitive info from .env file and test CNRFC credentials exist + """ + load_dotenv() + self.assertTrue(('CNRFC_USER' in os.environ) & ('CNRFC_PASSWORD' in os.environ)) + + def test_convert_date_columns(self): + """Ensure datetime data converted to string format""" + test_index = self.deterministic_frame.index.strftime('%Y-%m-%d') + self.assertEqual(test_index.tolist()[0], '2019-03-30') + + def test_validate_duration(self): + """ + function to properly format/case hourly or daily durations + """ + duration = 'Hourly' + self.assertEqual(cnrfc.cnrfc._validate_duration(duration), 'hourly') + + def test_validate_duration_invalid(self): + """ + test that invalid duration raises a ValueError + """ + bad_input = 'monthly' + self.assertRaises(ValueError, + cnrfc.cnrfc._validate_duration, + bad_input) + + def test_get_deterministic_forecast(self): + """ + Test that deterministic forecast start from Graphical_RVF page matches + CSV start of forecast + """ + cnrfc_id = 'FOLC1' + first_ordinate = cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=True)[-1] + df = cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False)['data'] + first_forecast_entry = df['forecast'].dropna().index.tolist()[0] + + # check that the date/time representation in the timestamp and datetime.datetime objects are the same + self.assertEqual(first_forecast_entry.year, first_ordinate.year) + self.assertEqual(first_forecast_entry.month, first_ordinate.month) + self.assertEqual(first_forecast_entry.day, first_ordinate.day) + self.assertEqual(first_forecast_entry.hour, first_ordinate.hour) + self.assertEqual(first_forecast_entry.minute, first_ordinate.minute) + + # for now, strip the local tzinfo from `first_ordinate` + self.assertEqual(first_forecast_entry.tzinfo, first_ordinate.replace(tzinfo=None).tzinfo) + + def test_get_deterministic_forecast_watershed(self): + """ + test watershed deterministic forecast download for North San Joaquin on a particular date + """ + df = cnrfc.get_deterministic_forecast_watershed('N_SanJoaquin', '2019040412')['data'] + self.assertEqual(df.head(20)['NHGC1'].values.tolist(), + self.deterministic_frame.head(20)['NHGC1'].values.tolist()) + + def test_get_water_year_trend_tabular(self): + """ + test watershed deterministic forecast download for North San Joaquin on a particular date + """ + df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] + self.assertEqual(df.shape, (365, 9)) + + # def test_get_seasonal_trend_tabular(self): + # cnrfc.get_seasonal_trend_tabular(cnrfc_id, water_year) + + # def test_get_water_year_trend_tabular(self): + # cnrfc.get_water_year_trend_tabular(cnrfc_id, water_year) + + # def test_get_deterministic_forecast(self): + # cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False, release=False) + + # def test_get_deterministic_forecast_watershed(self): + # cnrfc.get_deterministic_forecast_watershed(watershed, + # date_string, + # acre_feet=False, + # pdt_convert=False, + # as_pdt=False, + # cnrfc_id=None) + + # def test_get_forecast_meta_deterministic(self): + # cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=False, release=False) + + # def test_get_ensemble_forecast(self): + # cnrfc.get_ensemble_forecast(cnrfc_id, duration, acre_feet=False, pdt_convert=False, as_pdt=False) + + # def test_get_ensemble_forecast_watershed(self): + # cnrfc.get_ensemble_forecast_watershed(watershed, + # duration, + # date_string, + # acre_feet=False, + # pdt_convert=False, + # as_pdt=False, + # cnrfc_id=None) + + # def test_download_watershed_file(self): + # cnrfc.download_watershed_file(watershed, date_string, forecast_type, duration=None, path=None) + + # def test_get_watershed_forecast_issue_time(self): + # cnrfc.get_watershed_forecast_issue_time(duration, watershed, date_string=None, deterministic=False) + + # def test_get_watershed(self): + # cnrfc.get_watershed(cnrfc_id) + + # def test_get_ensemble_first_forecast_ordinate(self): + # cnrfc.get_ensemble_first_forecast_ordinate(url=None, df=None) + + # def test_get_ensemble_product_url(self): + # cnrfc.get_ensemble_product_url(product_id, cnrfc_id, data_format='') + + # def test_get_ensemble_product_1(self): + # cnrfc.get_ensemble_product_1(cnrfc_id) + + # def test_get_ensemble_product_2(self): + # cnrfc.get_ensemble_product_2(cnrfc_id) + + # def test_get_ensemble_product_3(self): + # cnrfc.get_ensemble_product_3(cnrfc_id) + + # def test_get_ensemble_product_5(self): + # cnrfc.get_ensemble_product_5(cnrfc_id) + + # def test_get_ensemble_product_6(self): + # cnrfc.get_ensemble_product_6(cnrfc_id) + + # def test_get_ensemble_product_10(self): + # cnrfc.get_ensemble_product_10(cnrfc_id) + + # def test_get_ensemble_product_11(self): + # cnrfc.get_ensemble_product_11(cnrfc_id) + + # def test_get_ensemble_product_12(self): + # cnrfc.get_ensemble_product_12(cnrfc_id) + + # def test_get_ensemble_product_13(self): + # cnrfc.get_ensemble_product_13(cnrfc_id) + + # def test_get_data_report_part_8(self): + # cnrfc.get_data_report_part_8() + + # def test_get_monthly_reservoir_storage_summary(self): + # cnrfc.get_monthly_reservoir_storage_summary() + + # def test_esp_trace_analysis_wrapper(self): + # cnrfc.esp_trace_analysis_wrapper() + + # def test__apply_conversions(self): + # cnrfc._apply_conversions(df, duration, acre_feet, pdt_convert, as_pdt) + + # def test__get_cnrfc_restricted_content(self): + # cnrfc._get_cnrfc_restricted_content(url) + + # def test__get_forecast_csv(self): + # cnrfc._get_forecast_csv(url) + + # def test_get_forecast_csvdata(self): + # cnrfc.get_forecast_csvdata(url) + + # def test_get_rating_curve(self): + # cnrfc.get_rating_curve(cnrfc_id) + + # def test__default_date_string(self): + # cnrfc._default_date_string(date_string) + + # def test__parse_blue_table(self): + # cnrfc._parse_blue_table(table_soup) + + +if __name__ == '__main__': + unittest.main() diff --git a/collect/tests/test_cvo.py b/collect/tests/test_cvo.py new file mode 100644 index 0000000..900d495 --- /dev/null +++ b/collect/tests/test_cvo.py @@ -0,0 +1,71 @@ +""" +collect.tests.test_basics +============================================================ +initial test suite for collect data access and utility functions; note: these tests require internet connection +""" +# -*- coding: utf-8 -*- +import datetime as dt +import io +import os +import textwrap +import unittest +import unittest.mock + +import pandas as pd + +from collect import cvo +from collect import utils + + +class TestCVO(unittest.TestCase): + + def test(self): + pass + + # prn test + result = cvo.get_data(dt.date(2000, 2, 1), dt.date(2011, 3, 31), 'doutdly') + + # pdf test + result = cvo.get_data(dt.date(2013, 12, 1), dt.date(2014, 1, 31), 'doutdly') + result = cvo.get_data(dt.date(2000, 2, 1), dt.date(2023, 5, 1), 'shafln') + result = cvo.get_data(dt.date(2012, 6, 1), dt.date(2013, 12, 31), 'slunit') + result = cvo.get_data(dt.date(2020, 6, 1), dt.date(2021, 1, 1), 'fedslu') + result = cvo.get_data(dt.date(2021, 1, 10), dt.date.now(), 'shadop') + result = cvo.get_data(dt.date(2023, 5, 1), dt.date.now(), 'kesdop') + + def test_get_area(self): + cvo.get_area(date_structure, report_type) + + def test_get_data(self): + cvo.get_data(start, end, report_type) + + def test_get_date_published(self): + cvo.get_date_published(url, date_structure, report_type) + + def test_get_report_columns(self): + cvo.get_report_columns(report_type, date_structure, expected_length=None, default=False) + + def test_get_report(self): + cvo.get_report(date_structure, report_type) + + def test_get_title(self): + cvo.get_title(report_type) + + def test_get_url(self): + cvo.get_url(date_structure, report_type) + + def test_months_between(self): + cvo.months_between(start_date, end_date) + + def test_doutdly_data_cleaner(self): + cvo.doutdly_data_cleaner(content, report_type, date_structure) + + def test_load_pdf_to_dataframe(self): + cvo.load_pdf_to_dataframe(content, date_structure, report_type, to_csv=False) + + def test_download_files(self): + cvo.download_files(start, end, report_type, destination='.') + + +if __name__ == '__main__': + unittest.main() diff --git a/collect/tests/test_dwr.py b/collect/tests/test_dwr.py new file mode 100644 index 0000000..d516ff9 --- /dev/null +++ b/collect/tests/test_dwr.py @@ -0,0 +1,184 @@ +""" +collect.tests.test_basics +============================================================ +initial test suite for collect data access and utility functions; note: these tests require internet connection +""" +# -*- coding: utf-8 -*- +import datetime as dt +import io +import os +import textwrap +import unittest +import unittest.mock + +from dotenv import load_dotenv +import pandas as pd + +from collect.dwr import cdec +from collect.dwr import casgem +from collect.dwr import cawdl +from collect.dwr import b120 +from collect.dwr import swp + +from collect import alert +from collect import cnrfc +from collect import cvo +from collect import nid +from collect import usgs +from collect import utils +from collect.usace import wcds + + +class TestCASGEM(unittest.TestCase): + """ + dwr.casgem module references inactive API; CASGEM tools must be updated once CNRA completes web transition + """ + + def test_get_casgem_data(self): + return + + casgem_id_result = casgem.get_casgem_data( + casgem_id='34318', + state_well_number=None, + local_well_designation=None, + master_site_code=None, + write_to_html_file=False + ) + + state_well_number_result = casgem.get_casgem_data( + casgem_id=None, + state_well_number='19N02W36H001M', + local_well_designation=None, + master_site_code=None, + write_to_html_file=False + ) + + local_well_designation_result = casgem.get_casgem_data( + casgem_id=None, + state_well_number=None, + local_well_designation='19N02W36H001M', + master_site_code=None, + write_to_html_file=False + ) + + master_site_code_result = casgem.get_casgem_data( + casgem_id=None, + state_well_number=None, + local_well_designation=None, + master_site_code='394564N1220246W001', + write_to_html_file=False + ) + + +class TestCAWDL(unittest.TestCase): + # """ + # dwr.cawdl module references inactive API; CAWDL tools must be updated once CNRA/DWR completes web transition + # """ + def test_get_cawdl_data(self): + cawdl.get_cawdl_data('17202') + + def test_get_cawdl_surface_water_data(self): + cawdl.get_cawdl_surface_water_data('17202', 2021, 'FLOW', interval='DAILY_MEAN') + + def test_get_cawdl_surface_water_por(self): + cawdl.get_cawdl_surface_water_por('17202', 'FLOW', interval='DAILY_MEAN') + + def test_get_cawdl_surface_water_site_report(self): + cawdl.get_cawdl_surface_water_site_report('17202') + + +class TestCDEC(unittest.TestCase): + + def test_get_b120_data(self): + b120.get_b120_data(date_suffix='') + + def test_validate_date_suffix(self): + b120.validate_date_suffix(date_suffix, min_year=2017) + + def test_clean_td(self): + b120.clean_td(text) + + def test_get_b120_update_data(self): + b120.get_b120_update_data(date_suffix='') + + def test_get_120_archived_reports(self): + b120.get_120_archived_reports(year, month) + + def test_april_july_dataframe(self): + b120.april_july_dataframe(data_list) + + def test_get_station_url(self): + cdec.get_station_url(station, start, end, data_format='CSV', sensors=[], duration='') + + def test_get_station_sensors(self): + cdec.get_station_sensors(station, start, end) + + def test_get_station_data(self): + cdec.get_station_data(station, start, end, sensors=[], duration='') + + def test_get_raw_station_csv(self): + cdec.get_raw_station_csv(station, start, end, sensors=[], duration='', filename='') + + def test_get_raw_station_json(self): + cdec.get_raw_station_json(station, start, end, sensors=[], duration='', filename='') + + def test_get_sensor_frame(self): + cdec.get_sensor_frame(station, start, end, sensor='', duration='') + + def test_get_station_metadata(self): + cdec.get_station_metadata(station, as_geojson=False) + + def test_get_dam_metadata(self): + cdec.get_dam_metadata(station) + + def test_get_reservoir_metadata(self): + cdec.get_reservoir_metadata(station) + + def test__get_table_index(self): + cdec._get_table_index(table_type, tables) + + def test__parse_station_generic_table(self): + cdec._parse_station_generic_table(table) + + def test__parse_station_sensors_table(self): + cdec._parse_station_sensors_table(table) + + def test__parse_station_comments_table(self): + cdec._parse_station_comments_table(table) + + def test__parse_data_available(self): + cdec._parse_data_available(text) + + def test_get_data(self): + cdec.get_data(station, start, end, sensor='', duration='') + + def test_get_daily_snowpack_data(self): + cdec.get_daily_snowpack_data(region, start, end) + + +class TestSWP(unittest.TestCase): + + def test_prompt_installation_and_exit(self): + swp.prompt_installation_and_exit() + + def test_get_report_catalog(self): + swp.get_report_catalog() + + def test_get_report_url(self): + swp.get_report_url() + + def test_get_raw_text(self): + swp.get_raw_text() + + def test_get_delta_daily_data(self): + swp.get_delta_daily_data() + + def test_get_barker_slough_data(self): + swp.get_barker_slough_data() + + def test_get_oco_tabular_data(self): + swp.get_oco_tabular_data() + + +if __name__ == '__main__': + unittest.main() diff --git a/collect/tests/test_nid.py b/collect/tests/test_nid.py new file mode 100644 index 0000000..6a244cf --- /dev/null +++ b/collect/tests/test_nid.py @@ -0,0 +1,186 @@ +""" +collect.tests.test_basics +============================================================ +initial test suite for collect data access and utility functions; note: these tests require internet connection +""" +# -*- coding: utf-8 -*- +import datetime as dt +import io +import textwrap +import unittest +import unittest.mock +import pandas as pd +from collect import nid + + +class TestNID(unittest.TestCase): + + @property + def sample_daily_data(self): + if not hasattr(self, '_sample_daily_data'): + self._sample_daily_data = io.StringIO(textwrap.dedent("""\ + Nevada Irrigation District USDAY V123 Output 11/22/2023 + + Summary Report + + Site: DC900 Scott's Flat Reservoir + USGS #: + Beginning Date: 01/01/2023 + Ending Date: 12/31/2023 + + Daily 2400 Storage Volume in Acre-Feet Water Year Jan 2023 to Dec 2023 + + 12/31/2022 44500 + + Day JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC + ------------------------------------------------------------------------------------------------------------------------------------ + 1 45300 48500 48500 48500 48500 47800 47700 45100 42400 40100 + 2 45800 48500 48500 48500 48500 47800 47600 45000 42300 40100 + 3 46200 48500 48500 48500 48500 47900 47500 44900 42300 40000 + 4 46400 48500 48500 48500 48500 48000 47500 44800 42200 40000 + 5 46900 48500 48500 48500 48500 48000 47400 44700 42100 39900 + + 6 47300 48500 48500 48500 48500 48000 47400 44600 42000 39900 + 7 47500 48500 48500 48500 48500 48100 47300 44500 42000 39800 + 8 47800 48500 48500 48500 48500 48000 47200 44400 41900 39800 + 9 48500 48500 48500 48500 48500 48000 47200 44400 41800 39800 + 10 48500 48500 48500 48500 48500 48000 47100 44300 41700 39700 + + 11 48500 48500 48500 48500 48500 48000 47000 44200 41600 39700 + 12 48500 48500 48500 48500 48500 48000 47000 44100 41500 39700 + 13 48500 48500 48500 48500 48400 48100 46900 44000 41400 39600 + 14 48500 48500 48500 48500 48400 48100 46800 43900 41400 39600 + 15 48500 48500 48500 48500 48300 48100 46700 43800 41300 39700 + + 16 48500 48500 48500 48500 48300 48100 46600 43700 41200 39800 + 17 48500 48500 48500 48500 48200 48100 46500 43600 41100 39800 + 18 48500 48500 48500 48500 48100 48000 46400 43500 41000 39900 + 19 48500 48500 48500 48500 48100 48000 46400 43400 40900 40000 + 20 48500 48400 48500 48500 48000 48000 46300 43400 40900 40100 + + 21 48500 48400 48500 48500 47900 48000 46200 43300 40800 40100 + 22 48500 48400 48500 48500 47800 48000 46100 43200 40700 40200 + 23 48500 48300 48500 48500 47800 47900 46000 43200 40600 + 24 48500 48400 48500 48500 47700 47900 45900 43100 40600 + 25 48500 48300 48500 48500 47600 47900 45800 43000 40500 + + 26 48500 48400 48500 48500 47600 47900 45700 42900 40400 + 27 48500 48500 48500 48500 47600 47800 45600 42800 40400 + 28 48500 48500 48500 48500 47600 47800 45500 42700 40300 + 29 48500 ------ 48500 48500 47700 47700 45400 42600 40300 + 30 48500 ------ 48500 48500 47700 47700 45300 42500 40200 + 31 48500 ------ 48500 ------ 47700 ------ 45200 42400 ------ ------ + + Max 48500 48500 48500 48500 48500 48100 47700 45100 42400 40200 + Min 45300 48300 48500 48500 47600 47700 45200 42400 40200 39600 + Change 4000 0 0 0 -800 0 -2500 -2800 -2200 + + Cal Year 2023 Mean 46300 Max 48500 Min 39600 Inst Max 48500 + + ------------------ Notes ------------------- + All recorded data is continuous and reliable + """)) + return self._sample_daily_data + + def test_get_sites(self): + result = nid.get_sites() + expected_dict = {'BR100': 'Auburn Ravine I at Head', + 'BR220': 'Hemphill Canal at Head', + 'BR301': 'Combie Phase I at Head', + 'BR334': 'Camp Far West at Head', + 'BR368': 'Gold Hill I at Head', + 'BR900': 'Combie Reservoir-Spill-1600.', + 'BSCA': 'Bowman-Spaulding Canal Intake Near Graniteville, Ca', + 'BWMN': 'Bowman Lake Near Graniteville, Ca', + 'CPFL': 'Chicago Park Flume Near Dutch Flat, Ca', + 'DC102': 'Cascade at Head', + 'DC131': 'Newtown Canal at Head', + 'DC140': 'Tunnel Canal at Head', + 'DC145': 'D. S. Canal at Head', + 'DC169': 'Tarr Canal at Head', + 'DC900': "Scott's Flat Reservoir", + 'DFFL': 'Dutch Flat #2 Flume Near Blue Canyon, Ca', + 'FAUC': 'Faucherie Lake Near Cisco, Ca', + 'FRLK': 'French Lake Near Cisco Grove, Ca', + 'JKSN': 'Jackson Lake near Sierra City', + 'JMDW': 'Jackson Meadows Reservoir Near Sierra City, Ca', + 'MBTO': 'Milton-Bowman Tunnel Outlet (South Portal)', + 'ROLK': 'Rollins Reservoir Near Colfax, Ca', + 'SWML': 'Sawmill Lake Near Graniteville, Ca', + 'WLSN': 'Wilson Creek near Sierra City'} + self.assertEqual(result, expected_dict) + + def test_get_issue_date(self): + result = nid.get_issue_date() + self.assertTrue(isinstance(result, dt.datetime)) + self.assertLess(result, dt.datetime.now()) + + def test_get_site_files(self): + site = 'DC140' + result = nid.get_site_files('DC140') + self.assertEqual(sorted(result), [f'{site}.adesc.pdf', + f'{site}.csv_flow.csv', + f'{site}.plot_flow.png', + f'{site}.usday_daily_flow.txt']) + + def test_get_site_metric(self): + self.assertEqual(nid.get_site_metric('BR334', interval='daily'), 'flow') + + def test_get_station_url(self): + self.assertEqual(nid.get_station_url('ROLK', metric='index', interval=None), + 'https://river-lake.nidwater.com/hyquick/ROLK/index.htm') + + self.assertEqual(nid.get_station_url('ROLK', metric='flow', interval='daily'), + 'https://river-lake.nidwater.com/hyquick/ROLK/ROLK.usday_daily_flow.txt') + + self.assertEqual(nid.get_station_url('ROLK', metric='flow', interval='hourly'), + 'https://river-lake.nidwater.com/hyquick/ROLK/ROLK.csv_flow.csv') + + def test_get_daily_data(self): + result = nid.get_daily_data('DC900', json_compatible=False) + year = result['info']['year'] + self.assertEqual(result['data'].head(4).index.strftime('%Y-%m-%d').tolist(), + [f'{year}-01-01', f'{year}-01-02', f'{year}-01-03', f'{year}-01-04']) + + def test_get_daily_meta(self): + url = 'https://river-lake.nidwater.com/hyquick/DC140/DC140.usday_daily_flow.txt' + result = nid.get_daily_meta(url=url, content=None) + self.assertEqual(result['Site'], 'DC140 Tunnel Canal at Head') + self.assertEqual(result['USGS #'], 'NO') + self.assertEqual(result['version'], 'USDAY V123') + + def test_get_hourly_data(self): + result = nid.get_hourly_data('WLSN', json_compatible=False) + sample = result['data'].head() + self.assertEqual(sample.index.strftime('%Y-%m-%d %H:%M:%S').tolist(), + ['2022-01-01 01:00:00', + '2022-01-01 02:00:00', + '2022-01-01 03:00:00', + '2022-01-01 04:00:00', + '2022-01-01 05:00:00']) + self.assertEqual(sample['Amount Diverted (AF)'].tolist(), [0.15, 0.15, 0.15, 0.15, 0.15]) + + def test_parse_qualifiers(self): + series = pd.Series(data=['Qualities:', + '2 - Good quality edited data', + '22 - Raw Satellite Data', + '28 - Radio data', + '255 - No data exists'], + name='Site Information') + self.assertEqual(nid.parse_qualifiers(series), + {'2': 'Good quality edited data', + '22': 'Raw Satellite Data', + '28': 'Radio data', + '255': 'No data exists'}) + + def test_serialize(self): + df = pd.DataFrame(index=pd.date_range('2020-12-01', '2020-12-03', freq='D'), + data={'VALUE': [42] * 3}) + self.assertEqual(nid.serialize(df.copy(), day_format='%Y-%m-%d'), + {'VALUE': {'2020-12-01 00:00': 42, '2020-12-02 00:00': 42, '2020-12-03 00:00': 42}}) + self.assertEqual(nid.serialize(df.copy(), day_format='%Y-%-m-%-d'), + {'VALUE': {'2020-12-1 00:00': 42, '2020-12-2 00:00': 42, '2020-12-3 00:00': 42}}) + + +if __name__ == '__main__': + unittest.main() diff --git a/collect/tests/test_usace.py b/collect/tests/test_usace.py new file mode 100644 index 0000000..5fbe4c6 --- /dev/null +++ b/collect/tests/test_usace.py @@ -0,0 +1,65 @@ +""" +collect.tests.test_basics +============================================================ +initial test suite for collect data access and utility functions; note: these tests require internet connection +""" +# -*- coding: utf-8 -*- +import datetime as dt +import unittest +import unittest.mock +from collect.usace import wcds + + +class TestUSACE(unittest.TestCase): + + def test_get_water_year_data(self): + result = wcds.get_water_year_data('buc', 2021, interval='d') + self.assertEqual(result['data'].shape, (397, 16)) + + sample = result['data'].head(4) + self.assertEqual(result['data'].head(4)['Top of Conservation (ac-ft)'].tolist(), + [149521.45, 149042.90, 148564.35, 148085.80]) + + # does not include timezone handling + self.assertEqual(list(map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'), result['data'].head(4).index.tolist())), + ['2020-08-31 00:00:00', + '2020-09-01 00:00:00', + '2020-09-02 00:00:00', + '2020-09-03 00:00:00']) + + # does not include timezone handling + self.assertEqual(list(map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'), result['data'].tail(4).index.tolist())), + ['2021-09-28 00:00:00', + '2021-09-29 00:00:00', + '2021-09-30 00:00:00', + '2021-10-01 00:00:00']) + + def test_get_data(self): + result = wcds.get_wcds_data('sha', dt.datetime(2023, 1, 15), dt.datetime(2023, 2, 1), interval='d') + self.assertEqual(result['data'].shape, (398, 16)) + self.assertEqual(result['data']['Storage'].tolist()[:4], [1592122.0, 1590203.0, 1585627.0, 1582232.0]) + + def test_get_wcds_reservoirs(self): + """ + show that 35 reservoirs exist in the internal collect record for WCDS reservoirs + """ + self.assertEqual(wcds.get_wcds_reservoirs().shape[0], 35) + + def test_get_wcds_data(self): + result = wcds.get_wcds_data('sha', dt.datetime(2023, 1, 15), dt.datetime(2023, 2, 1), interval='d') + self.assertEqual(result['data'].shape, (398, 16)) + self.assertEqual(result['data']['Storage'].tolist()[:4], [1592122.0, 1590203.0, 1585627.0, 1582232.0]) + + def test_get_release_report(self): + self.assertEqual(wcds.get_release_report('buc')['info']['units'], 'cfs') + self.assertGreater(wcds.get_release_report('buc')['data'].shape[0], 0) + + def test_get_reservoir_metadata(self): + result = wcds.get_reservoir_metadata('nhg', 2022, interval='d') + self.assertEqual(int(result['gross pool (stor)']), 317100) + self.assertEqual(int(result['gross pool (elev)']), 713) + self.assertTrue('Precip @ Dam (in; elev 712 ft)' in result['data headers']) + + +if __name__ == '__main__': + unittest.main() diff --git a/collect/tests/test_usgs.py b/collect/tests/test_usgs.py new file mode 100644 index 0000000..50015c4 --- /dev/null +++ b/collect/tests/test_usgs.py @@ -0,0 +1,46 @@ +""" +collect.tests.test_basics +============================================================ +initial test suite for collect data access and utility functions; note: these tests require internet connection +""" +# -*- coding: utf-8 -*- +import datetime as dt +import unittest +import unittest.mock +from collect import usgs + + +class TestUSGS(unittest.TestCase): + + def test_get_query_url(self): + url = usgs.get_query_url(11418500, '00060', dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 5), 'instantaneous') + expected_url = '&'.join(['https://waterservices.usgs.gov/nwis/iv/?format=json', + 'sites=11418500', + 'startDT=2023-01-01T00:00:00', + 'endDT=2023-01-05T00:00:00', + 'parameterCd=00060', + 'siteStatus=all']) + self.assertEqual(url, expected_url) + + def test_get_data(self): + result = usgs.get_data(11418500, '00060', dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 5), interval='daily') + self.assertEqual(result['data']['00060'].tolist(), [1280.0, 341.0, 351.0, 260.0, 1790.0]) + self.assertEqual(result['data'].index.strftime('%Y-%m-%d').tolist(), + ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']) + + def test_get_usgs_data(self): + result = usgs.get_usgs_data(11418500, '00060', dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 5), interval='daily') + self.assertEqual(result['data']['00060'].tolist(), [1280.0, 341.0, 351.0, 260.0, 1790.0]) + self.assertEqual(result['data'].index.strftime('%Y-%m-%d').tolist(), + ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']) + + def test_get_peak_streamflow(self): + result = usgs.get_peak_streamflow(11418500)['data'][['peak_va']] + self.assertEqual(result.head()['peak_va'].tolist(), + ['14000', '6260', '7520', '10800', '2400']) + self.assertEqual(result.head().index.strftime('%Y-%m-%d').tolist(), + ['1928-02-29', '1936-02-21', '1937-02-04', '1937-12-11', '1939-03-08']) + + +if __name__ == '__main__': + unittest.main() diff --git a/collect/tests/test_utils.py b/collect/tests/test_utils.py new file mode 100644 index 0000000..51bb843 --- /dev/null +++ b/collect/tests/test_utils.py @@ -0,0 +1,33 @@ +""" +collect.tests.test_basics +============================================================ +initial test suite for collect data access and utility functions; note: these tests require internet connection +""" +# -*- coding: utf-8 -*- +import datetime as dt +import io +import os +import textwrap +import unittest +import unittest.mock +from collect import utils + + +class TestUtils(unittest.TestCase): + + # def test_get_session_response(self): + # utils.get_session_response(url) + + # def test_get_web_status(self): + # utils.get_web_status(url) + + # def test_clean_fixed_width_headers(self): + # utils.clean_fixed_width_headers(columns) + + def test_get_water_year(self): + self.assertEqual(utils.get_water_year(dt.datetime(2023, 5, 12)), 2023) + self.assertEqual(utils.get_water_year(dt.datetime(2023, 11, 12)), 2024) + + +if __name__ == '__main__': + unittest.main() From 97915dbdeaf6fef933c4b2effa4c6da4e559079e Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Tue, 28 Nov 2023 14:26:58 -0800 Subject: [PATCH 10/36] Update cvo tests; update area target for certain report/date combinations --- collect/cvo/cvo.py | 12 ++- collect/tests/test_cvo.py | 205 +++++++++++++++++++++++++++++++++----- 2 files changed, 186 insertions(+), 31 deletions(-) diff --git a/collect/cvo/cvo.py b/collect/cvo/cvo.py index b355174..4372001 100644 --- a/collect/cvo/cvo.py +++ b/collect/cvo/cvo.py @@ -75,14 +75,17 @@ def get_area(date_structure, report_type): if report_date <= dt.date(2010, 12, 1): return None - # provide pdf target area - # dates of specific changes to pdf sizing + # provide pdf target area that is date-specific if (report_date.strftime('%Y-%m') == dt.date.today().strftime('%Y-%m') or (dt.date(2020, 1, 1) <= report_date <= dt.date(2020, 8, 1)) or (dt.date(2019, 3, 1) <= report_date <= dt.date(2019, 8, 1)) + or (dt.date(2022, 1, 1) <= report_date <= dt.date(2022, 4, 1)) or (dt.date(2022, 6, 1) <= report_date <= dt.date.today())): area = [290.19, 20.76, 750.78, 1300.67] + elif report_date == dt.date(2022, 5, 1): + area = [290.19, 20.76, 550.78, 1300.67] + elif dt.date(2010, 12, 1) < report_date <= dt.date(2017, 1, 1): # Weird date where pdf gets slightly longer # Other PDFs are smaller than the usual size @@ -153,7 +156,7 @@ def get_area(date_structure, report_type): if report_type == 'shadop': # set the default bottom boundary for tabula read_pdf function - area = [140, 30, 460, 540] + area = [140, 30, 700, 540] # set the bottom boundary for tabula read_pdf function for February months if date_structure.month == 2: @@ -361,7 +364,8 @@ def get_date_published(url, date_structure, report_type): # check that a response is provided if len(pages) > 0: if len(pages[0].values) > 0: - date_published = dateutil.parser.parse(pages[0].values.tolist()[-1][-1]).date() + date_text = pages[0].values.tolist()[-1][-1].replace('Run Date:', '') + date_published = dateutil.parser.parse(date_text).date() # alernate report formats elif url.endswith('.prn') or url.endswith('.txt'): diff --git a/collect/tests/test_cvo.py b/collect/tests/test_cvo.py index 900d495..43c0d86 100644 --- a/collect/tests/test_cvo.py +++ b/collect/tests/test_cvo.py @@ -19,53 +19,204 @@ class TestCVO(unittest.TestCase): - def test(self): - pass - - # prn test - result = cvo.get_data(dt.date(2000, 2, 1), dt.date(2011, 3, 31), 'doutdly') - - # pdf test - result = cvo.get_data(dt.date(2013, 12, 1), dt.date(2014, 1, 31), 'doutdly') - result = cvo.get_data(dt.date(2000, 2, 1), dt.date(2023, 5, 1), 'shafln') - result = cvo.get_data(dt.date(2012, 6, 1), dt.date(2013, 12, 31), 'slunit') - result = cvo.get_data(dt.date(2020, 6, 1), dt.date(2021, 1, 1), 'fedslu') - result = cvo.get_data(dt.date(2021, 1, 10), dt.date.now(), 'shadop') - result = cvo.get_data(dt.date(2023, 5, 1), dt.date.now(), 'kesdop') - def test_get_area(self): - cvo.get_area(date_structure, report_type) + """ + demonstrate that get_area produces expected results for all reports for one possible date + """ + self.assertEqual(cvo.get_area(dt.date(2013, 12, 1), 'doutdly'), [151.19, 20.76, 390, 900.67]) + self.assertEqual(cvo.get_area(dt.date(2013, 12, 1), 'fedslu'), [140, 30, 500, 700]) + self.assertEqual(cvo.get_area(dt.date(2013, 12, 1), 'kesdop'), [145, 30, 465, 881]) + self.assertEqual(cvo.get_area(dt.date(2013, 12, 1), 'shadop'), [140, 30, 700, 540]) + self.assertEqual(cvo.get_area(dt.date(2013, 12, 1), 'shafln'), [140, 30, 445, 540]) + self.assertEqual(cvo.get_area(dt.date(2013, 12, 1), 'slunit'), [120, 20, 480, 820]) def test_get_data(self): - cvo.get_data(start, end, report_type) + """ + initial tests to demonstrate retrieving data spanning multiple PDF reports to build a timeseries record + """ + result = cvo.get_data(dt.date(2023, 6, 1), dt.date(2023, 8, 31), 'shadop') + self.assertEqual(result['data'].sum()['ELEV']['ELEV']['ELEV'], 96536.34) + self.assertEqual(result['data'].shape, (92, 11)) def test_get_date_published(self): - cvo.get_date_published(url, date_structure, report_type) + """ + test that date published can be extracted from a past report in the archive + """ + url = cvo.get_url(dt.date(2022, 2, 15), 'shadop') + result = cvo.get_date_published(url, dt.date(2022, 2, 15), 'shadop') + self.assertEqual(result.strftime('%Y-%m-%d'), '2023-04-19') + self.assertTrue(isinstance(result, dt.date)) def test_get_report_columns(self): - cvo.get_report_columns(report_type, date_structure, expected_length=None, default=False) + """ + demonstration of expected behavior for get_report_columns with shafln report type + """ + expected_result = ( + ('Day', ''), + ('Storage - A.F.', 'Lake Britton'), + ('Storage - A.F.', 'McCloud Div'), + ('Storage - A.F.', 'Iron Canyon'), + ('Storage - A.F.', 'Pit 6'), + ('Storage - A.F.', 'Pit 7'), + ('Reservoir Total', 'Reservoir Total'), + ('Change', 'A.F.'), + ('Change', 'C.F.S.'), + ('Shasta Inflow C.F.S.', 'Shasta Inflow C.F.S.'), + ('Natural River C.F.S.', 'Natural River C.F.S.'), + ('Accum * Full Natural 1000 A.F.', 'Accum * Full Natural 1000 A.F.') + ) + self.assertEqual(cvo.get_report_columns('shafln', dt.date.today(), expected_length=None, default=False), + expected_result) def test_get_report(self): - cvo.get_report(date_structure, report_type) + """ + test demonstrating expected behavior for delta daily outflow report retrieval for a particular date (May 2022) + """ + result = cvo.get_report(dt.date(2022, 5, 1), 'doutdly') + sample = result['data'].head()['Outflow Index']['Monthly Avg'] + self.assertEqual(sample.index.strftime('%Y-%m-%d').tolist(), + ['2022-05-14', '2022-05-15', '2022-05-16', '2022-05-17', '2022-05-18']) + self.assertEqual(sample.values.tolist(), + [4473.0, 4540.0, 4557.0, 4606.0, 4640.0]) def test_get_title(self): - cvo.get_title(report_type) + """ + test that the correct title is provided for each supported report type + """ + for report_type, expected_title in [ + ('doutdly', 'U.S. Bureau of Reclamation - Central Valley Operations Office Delta Outflow Computation'), + ('fedslu', 'San Luis Reservoir Federal Daily Operations'), + ('kesdop', 'Kesdop Reservoir Daily Operations'), + ('shadop', 'Shadop Reservoir Daily Operations'), + ('shafln', 'Shasta Reservoir Daily Operations'), + ('slunit', 'Federal-State Operations, San Luis Unit') + ]: + self.assertEqual(cvo.get_title(report_type), expected_title) + + @unittest.mock.patch('collect.cvo.dt.date') + def test_get_url_today(self, mock_date): + """ + test that the correct url is returned if the provided date_structure is mocked to represent "today" + """ + mock_date.today.return_value = dt.date(2023, 11, 1) + expected_url = 'https://www.usbr.gov/mp/cvo/vungvari/fedslu.pdf' + self.assertEqual(cvo.get_url(dt.date(2023, 11, 1), 'fedslu'), expected_url) def test_get_url(self): - cvo.get_url(date_structure, report_type) + """ + test that the correct url is returned for a report with either date or datetime input that is not "today" + """ + expected_url = 'https://www.usbr.gov/mp/cvo/vungvari/fedslu0120.pdf' + self.assertEqual(cvo.get_url(dt.date(2020, 1, 1), 'fedslu'), expected_url) + self.assertEqual(cvo.get_url(dt.datetime(2020, 1, 1), 'fedslu'), expected_url) def test_months_between(self): - cvo.months_between(start_date, end_date) + """ + test that the generator yields the appropriate sequence of months + """ + self.assertEqual(list(cvo.months_between(dt.datetime(2023, 1, 1), dt.datetime(2023, 4, 1))), + [dt.date(2023, 1, 1), dt.date(2023, 2, 1), dt.date(2023, 3, 1), dt.date(2023, 4, 1)]) + self.assertEqual(list(cvo.months_between(dt.date(2023, 1, 1), dt.date(2023, 4, 1))), + [dt.date(2023, 1, 1), dt.date(2023, 2, 1), dt.date(2023, 3, 1), dt.date(2023, 4, 1)]) def test_doutdly_data_cleaner(self): - cvo.doutdly_data_cleaner(content, report_type, date_structure) + content = io.StringIO(textwrap.dedent("""\ + 02/01/20,"21,152",210.0,93.0,579.0,"2,058","2,090","2,055","24,092",900.0,"3,696","2,617",186.0,4.0,45.0,"6,540","6,342","16,652","17,092","16,652",26%,24%,30% + 02/02/20,"19,222",217.0,84.0,536.0,"2,055","2,074","2,040","22,114",900.0,"3,691","2,612",168.0,5.0,58.0,"6,524","6,325","14,690","17,310","15,671",28%,26%,29% + 02/03/20,"17,787",224.0,76.0,523.0,"2,025","2,062","2,038","20,635",900.0,"3,694","2,626",136.0,4.0,69.0,"6,520","6,312","13,215","17,168","14,852",31%,28%,29% + 02/04/20,"17,439",231.0,70.0,515.0,"2,034","2,083","2,090","20,289",900.0,"3,700","2,621",153.0,4.0,62.0,"6,531","6,314","12,858","16,839","14,354",31%,30%,29% + 02/05/20,"16,595",239.0,65.0,501.0,"2,245","2,191","2,240","19,645",900.0,"2,696","3,478",154.0,3.0,58.0,"6,384","6,271","12,361","15,842","13,955",31%,31%,29% + 02/06/20,"16,158",246.0,63.0,497.0,"2,840","2,380","2,434","19,804",900.0,"2,896","3,480",126.0,4.0,59.0,"6,558","6,290","12,346","14,560","13,687",32%,32%,29% + 02/07/20,"15,874",253.0,60.0,486.0,"3,403","2,550","2,550","20,076",900.0,"3,393","3,555",128.0,12.0,66.0,"7,130","6,499","12,046","13,452","13,452",35%,33%,30% + 02/08/20,"15,134",260.0,57.0,477.0,"3,248","2,658","2,582","19,176",900.0,"2,989","3,597",104.0,10.0,79.0,"6,758","6,637","11,518","12,719","13,211",34%,34%,30% + 02/09/20,"14,607",260.0,54.0,469.0,"2,809","2,720","2,569","18,199",900.0,"2,796","2,636",108.0,12.0,57.0,"5,585","6,322","11,714","12,294","13,044",30%,33%,29% + 02/10/20,"14,662",260.0,51.0,463.0,"2,464","2,740","2,529","17,900",900.0,"2,798","2,629",104.0,12.0,128.0,"5,647","5,815","11,353","12,028","12,875",30%,31%,27% + 02/11/20,"13,321",260.0,47.0,455.0,"2,171","2,698","2,477","16,254",850.0,"1,193","2,645",110.0,12.0,69.0,"4,005","4,899","11,399","11,819","12,741",24%,28%,23% + 02/12/20,"13,006",260.0,44.0,450.0,"1,952","2,555","2,424","15,712",850.0,"1,088","1,845",113.0,20.0,73.0,"3,099","4,066","11,763","11,734","12,659",19%,24%,20% + 02/13/20,"12,921",260.0,43.0,443.0,"1,841","2,315","2,370","15,508",850.0,"1,992",879,120.0,13.0,72.0,"3,051","3,214","11,607","11,628","12,579",18%,20%,16% + 02/14/20,"12,529",260.0,41.0,438.0,"1,720","2,083","2,316","14,988",850.0,"1,486",876,105.0,21.0,69.0,"2,516","2,722","11,622","11,568","12,510",16%,18%,14% + 02/15/20,"12,749",260.0,41.0,435.0,"1,623","1,896","2,262","15,108",850.0,"1,486",876,111.0,16.0,67.0,"2,524","2,532","11,734","11,599","12,458",16%,17%,14% + 02/16/20,"13,044",260.0,41.0,433.0,"1,500","1,750","2,211","15,278",850.0,"1,690",876,109.0,16.0,71.0,"2,730","2,430","11,698","11,597","12,411",17%,16%,14% + 02/17/20,"12,770",260.0,41.0,431.0,"1,444","1,643","2,164","14,946",850.0,"1,491",877,111.0,13.0,68.0,"2,533","2,432","11,563","11,626","12,361",16%,16%,14% + 02/18/20,"12,694",260.0,40.0,429.0,"1,422","1,563","2,122","14,845",850.0,"1,399",875,110.0,6.0,74.0,"2,452","2,403","11,543","11,647","12,316",15%,16%,14% + 02/19/20,"11,719",260.0,39.0,425.0,"1,394","1,498","2,083","13,837",850.0,692,878,112.0,16.0,77.0,"1,743","2,071","11,244","11,573","12,259",11%,14%,12% + 02/20/20,"11,472",260.0,38.0,423.0,"1,384","1,452","2,049","13,577",850.0,0,877,112.0,19.0,79.0,"1,050","1,574","11,677","11,583","12,230",6%,11%,10% + 02/21/20,"11,312",260.0,37.0,421.0,"1,397","1,498","2,044","13,427",850.0,698,4,109.0,15.0,65.0,860,"1,049","11,717","11,596","12,206",5%,8%,7% + 02/22/20,"11,089",260.0,37.0,415.0,"1,943","1,601","2,052","13,744",900.0,493,870,105.0,21.0,73.0,"1,520",980,"11,324","11,538","12,166",10%,7%,6% + 02/23/20,"10,725",260.0,40.0,412.0,"2,222","1,760","2,074","13,659",900.0,495,873,116.0,18.0,72.0,"1,538","1,144","11,221","11,470","12,124",10%,8%,8% + 02/24/20,"11,437",260.0,39.0,412.0,"2,556","1,944","2,100","14,704",900.0,792,871,116.0,27.0,74.0,"1,826","1,464","11,978","11,529","12,118",11%,10%,10% + 02/25/20,"11,846",260.0,35.0,419.0,"2,709","2,137","2,126","15,269",900.0,"1,689",870,120.0,43.0,80.0,"2,716","1,863","11,653","11,545","12,100",16%,13%,13% + 02/26/20,"11,768",260.0,33.0,375.0,"2,751","2,330","2,149","15,187",900.0,"1,773",869,120.0,42.0,78.0,"2,798","2,288","11,489","11,580","12,076",17%,15%,15% + 02/27/20,"11,975",260.0,33.0,393.0,"2,732","2,527","2,173","15,393",900.0,"1,797",870,118.0,35.0,69.0,"2,820","2,623","11,673","11,579","12,061",17%,17%,18% + 02/28/20,"11,881",260.0,31.0,299.0,"2,777","2,646","2,194","15,248",900.0,"1,791",876,121.0,32.0,68.0,"2,824","2,659","11,524","11,552","12,042",17%,17%,18% + 02/29/20,"11,767",260.0,31.0,245.0,"2,778","2,730","2,810","15,081",900.0,"1,491",875,123.0,19.0,80.0,"2,550","2,567","11,631","11,596","11,631",16%,17%,17% + -,,,,,,,,,,,,,,,,,,,,,, + -,,,,,,,,,,,,,,,,,,,,,, + """)) + result = cvo.doutdly_data_cleaner([pd.read_csv(content, header=None, index_col=None)], + 'doutdly', + dt.date(2020, 2, 1)) + self.assertEqual(result.head()['Delta Inflow']['Yolo + Misc prev dy'].tolist(), + [93.0, 84.0, 76.0, 70.0, 65.0]) def test_load_pdf_to_dataframe(self): - cvo.load_pdf_to_dataframe(content, date_structure, report_type, to_csv=False) - - def test_download_files(self): - cvo.download_files(start, end, report_type, destination='.') + """ + test that the load_pdf_to_dataframe processing function works with predictable list of dataframe input that would + be produced by the tabula-py scraper for a particular date + """ + content = io.StringIO(textwrap.dedent("""\ + ,,STORAGE,COMPUTED*,SPRING,SHASTA,,EVAP + ,,ACRE-FEET,INFLOW,CR. P. P.,RELEASE,RELEASE - C.F.S.,C. F. S. + DAY,ELEV,RES. CHANGE,C.F.S.,RELEASE,C. F. S.,POWER SPILL FISHTRAP,(1) + ,,"20,616",,,,, + 1,580.93,"20,060 -556","9,900",699,"8,238","9,105 1,070 0",5 + 2,582.79,"21,158 1,098","10,650",75,"8,769","9,094 1,002 0",0 + 3,582.32,"20,877 -281","9,983",505,"7,740","9,097 1,027 0",1 + 4,582.60,"21,044 167","12,108",190,"10,372","9,106 2,914 0",4 + 5,582.72,"21,116 72","13,012",655,"10,366","9,252 3,723 0",1 + 6,583.14,"21,368 252","13,201",434,"10,769","8,018 5,056 0",0 + 7,581.28,"20,264 -1,104","12,414",58,"10,454","6,756 6,215 0",0 + 8,582.73,"21,122 858","13,471",85,"10,979","8,808 4,230 0",0 + 9,581.47,"20,375 -747","12,711",64,"10,474","8,157 4,931 0",0 + 10,582.80,"21,164 789","13,313",61,"11,308","7,250 5,663 0",2 + 11,579.96,"19,502 -1,662","12,256",273,"9,919","8,265 4,826 0",3 + 12,582.47,"20,967 1,465","13,778",37,"11,382","12,974 60 0",5 + 13,580.84,"20,008 -959","12,659",37,"10,389","13,137 0 0",5 + 14,581.51,"20,399 391","13,281",37,"10,919","13,078 0 0",6 + 15,583.80,"21,769 1,370","13,702",37,"11,282","13,005 0 0",6 + 16,585.63,"22,903 1,134","13,628",37,"11,254","13,050 0 0",6 + 17,585.42,"22,771 -132","13,030",319,"10,287","13,091 0 0",6 + 18,581.17,"20,200 -2,571","11,762",124,"9,417","13,053 0 0",5 + 19,582.33,"20,883 683","13,366",669,"10,316","13,017 0 0",5 + 20,581.75,"20,540 -343","12,837",47,"10,576","13,003 0 0",7 + 21,580.39,"19,749 -791","12,606",115,"10,248","13,000 0 0",5 + 22,580.09,"19,576 -173","13,013",540,"10,170","13,094 0 0",6 + 23,580.33,"19,714 138","11,344",144,"9,238","11,269 0 0",5 + 24,579.42,"19,198 -516","10,750",154,"8,667","11,006 0 0",4 + 25,581.64,"20,475 1,277","11,661",446,"9,179","11,011 0 0",6 + 26,580.85,"20,014 -461","10,780",111,"8,821","11,008 0 0",4 + 27,581.52,"20,405 391","11,191",37,"9,354","10,967 23 0",4 + 28,581.68,"20,498 93","10,997",53,"9,210","10,947 0 0",3 + 29,582.78,"21,152 654","10,518",37,"8,615","10,168 16 0",4 + 30,582.09,"20,740 -412","9,878",355,"7,832","10,081 0 0",5 + 31,581.46,"20,370 -370","9,898",105,"7,948","10,080 0 0",5 + TOTALS,,-246,"373,698","6,540","304,492","332,947 40,756 0",118 + """)) + result = cvo.load_pdf_to_dataframe([pd.read_csv(content, header=None, index_col=None)], + dt.date(2023, 5, 1), + 'kesdop') + self.assertEqual(result.tail()['RELEASE - C.F.S.']['POWER'].tolist(), + [10967.0, 10947.0, 10168.0, 10081.0, 10080.0]) + + def deferred_test_download_files(self): + """ + this test will eventually be implemented to check the appropriate creation of files from the downloading data + using + cvo.download_files(start, end, report_type, destination='.') + """ + return if __name__ == '__main__': unittest.main() + From 81bf081f46a820ca3e07950049628d276bbd48c7 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Tue, 28 Nov 2023 14:27:22 -0800 Subject: [PATCH 11/36] Update test_dwr --- collect/tests/test_dwr.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/collect/tests/test_dwr.py b/collect/tests/test_dwr.py index d516ff9..5342a5c 100644 --- a/collect/tests/test_dwr.py +++ b/collect/tests/test_dwr.py @@ -11,7 +11,6 @@ import unittest import unittest.mock -from dotenv import load_dotenv import pandas as pd from collect.dwr import cdec @@ -20,13 +19,9 @@ from collect.dwr import b120 from collect.dwr import swp -from collect import alert -from collect import cnrfc -from collect import cvo -from collect import nid -from collect import usgs -from collect import utils -from collect.usace import wcds + +class TestB120(unittest.TestCase): + pass class TestCASGEM(unittest.TestCase): @@ -71,9 +66,9 @@ def test_get_casgem_data(self): class TestCAWDL(unittest.TestCase): - # """ - # dwr.cawdl module references inactive API; CAWDL tools must be updated once CNRA/DWR completes web transition - # """ + """ + dwr.cawdl module references inactive API; CAWDL tools must be updated once CNRA/DWR completes web transition + """ def test_get_cawdl_data(self): cawdl.get_cawdl_data('17202') @@ -180,5 +175,9 @@ def test_get_oco_tabular_data(self): swp.get_oco_tabular_data() +class TestWSI(unittest.TestCase): + pass + + if __name__ == '__main__': unittest.main() From 446694c7ebbacadb9de9eb4f4ab9d2e979ea76e7 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Tue, 28 Nov 2023 14:39:34 -0800 Subject: [PATCH 12/36] Add test for each utility --- collect/tests/test_utils.py | 41 +++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/collect/tests/test_utils.py b/collect/tests/test_utils.py index 51bb843..583e5b5 100644 --- a/collect/tests/test_utils.py +++ b/collect/tests/test_utils.py @@ -10,19 +10,44 @@ import textwrap import unittest import unittest.mock +import pandas as pd +import requests from collect import utils class TestUtils(unittest.TestCase): - # def test_get_session_response(self): - # utils.get_session_response(url) - - # def test_get_web_status(self): - # utils.get_web_status(url) - - # def test_clean_fixed_width_headers(self): - # utils.clean_fixed_width_headers(columns) + def test_get_session_response(self): + result = utils.get_session_response('https://example.com') + self.assertTrue('Example Domain' in result.text) + self.assertTrue(isinstance(result, requests.models.Response)) + self.assertEqual(result.status_code, 200) + + def test_get_web_status(self): + self.assertTrue(utils.get_web_status('https://example.com')) + + def test_clean_fixed_width_headers(self): + test_headers = [ + ['Unnamed: 0_level_0', '90%', '75%', '50%', '25%', '10%'] + [f'Unnamed: {i}_level_0' for i in range (6, 10)], + ['Unnamed: 0_level_1'] + ['Exceedance'] * 5 + ['NWS', 'Raw Obs', 'Raw Avg', 'Unnamed: 9_level_1'], + ['Unnamed: 0_level_2'] + ['Apr-Jul'] * 8 + ['Raw Daily'], + ['Date'] + ['Forecast'] * 3 + ['Foreacast', 'Foreacst', 'Forecast', 'To Date', 'To Date', 'Observation'], + ['(mm/dd/YYYY)'] + ['(kaf)'] * 9 + ] + columns = pd.MultiIndex.from_tuples(zip(*test_headers)) + + expected_columns = ['Date (mm/dd/YYYY)', + '90% Exceedance Apr-Jul Forecast (kaf)', + '75% Exceedance Apr-Jul Forecast (kaf)', + '50% Exceedance Apr-Jul Forecast (kaf)', + '25% Exceedance Apr-Jul Foreacast (kaf)', + '10% Exceedance Apr-Jul Foreacst (kaf)', + 'NWS Apr-Jul Forecast (kaf)', + 'Raw Obs Apr-Jul To Date (kaf)', + 'Raw Avg Apr-Jul To Date (kaf)', + 'Raw Daily Observation (kaf)'] + + self.assertEqual(utils.clean_fixed_width_headers(columns), expected_columns) def test_get_water_year(self): self.assertEqual(utils.get_water_year(dt.datetime(2023, 5, 12)), 2023) From 3f992b93b944f0bb44084ab35506b71e03e2e06b Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Tue, 28 Nov 2023 15:20:52 -0800 Subject: [PATCH 13/36] Extend CNRFC tests --- collect/cnrfc/gages.py | 2 +- collect/tests/test_cnrfc.py | 299 ++++++++++++++++++++---------------- 2 files changed, 165 insertions(+), 136 deletions(-) diff --git a/collect/cnrfc/gages.py b/collect/cnrfc/gages.py index 4086046..1d6cac9 100644 --- a/collect/cnrfc/gages.py +++ b/collect/cnrfc/gages.py @@ -43,7 +43,7 @@ 'HLLC1SPL'] LOWERSACRAMENTO_GAGES = ['SAMC1', 'SACC1', 'VONC1', 'FMWC1', 'DRMC1', 'RCVC1', - 'FMWC1L', 'SACC1L', 'SAMC1L', 'NCOC1L'] + 'FMWC1L', 'SACC1L', 'SAMC1L', 'NCOC1', 'NCOC1L'] CENTRALCOAST_GAGES = ['LWDC1', 'SNRC1', 'NBYC1', 'NACC1', 'PRBC1', 'RDRC1', 'BSRC1', 'PIIC1', 'TESC1', 'HOSC1', 'PHOC1', 'AROC1', diff --git a/collect/tests/test_cnrfc.py b/collect/tests/test_cnrfc.py index 1ebf08d..ebc0aa9 100644 --- a/collect/tests/test_cnrfc.py +++ b/collect/tests/test_cnrfc.py @@ -96,172 +96,201 @@ def deterministic_frame(self): dtype={'GMT': str}).mul(1000) return self._deterministic_frame - def test_cnrfc_credentials(self): - """ - load sensitive info from .env file and test CNRFC credentials exist - """ - load_dotenv() - self.assertTrue(('CNRFC_USER' in os.environ) & ('CNRFC_PASSWORD' in os.environ)) - - def test_convert_date_columns(self): - """Ensure datetime data converted to string format""" - test_index = self.deterministic_frame.index.strftime('%Y-%m-%d') - self.assertEqual(test_index.tolist()[0], '2019-03-30') - - def test_validate_duration(self): - """ - function to properly format/case hourly or daily durations - """ - duration = 'Hourly' - self.assertEqual(cnrfc.cnrfc._validate_duration(duration), 'hourly') - - def test_validate_duration_invalid(self): - """ - test that invalid duration raises a ValueError - """ - bad_input = 'monthly' - self.assertRaises(ValueError, - cnrfc.cnrfc._validate_duration, - bad_input) - - def test_get_deterministic_forecast(self): - """ - Test that deterministic forecast start from Graphical_RVF page matches - CSV start of forecast - """ - cnrfc_id = 'FOLC1' - first_ordinate = cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=True)[-1] - df = cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False)['data'] - first_forecast_entry = df['forecast'].dropna().index.tolist()[0] - - # check that the date/time representation in the timestamp and datetime.datetime objects are the same - self.assertEqual(first_forecast_entry.year, first_ordinate.year) - self.assertEqual(first_forecast_entry.month, first_ordinate.month) - self.assertEqual(first_forecast_entry.day, first_ordinate.day) - self.assertEqual(first_forecast_entry.hour, first_ordinate.hour) - self.assertEqual(first_forecast_entry.minute, first_ordinate.minute) - - # for now, strip the local tzinfo from `first_ordinate` - self.assertEqual(first_forecast_entry.tzinfo, first_ordinate.replace(tzinfo=None).tzinfo) - - def test_get_deterministic_forecast_watershed(self): - """ - test watershed deterministic forecast download for North San Joaquin on a particular date - """ - df = cnrfc.get_deterministic_forecast_watershed('N_SanJoaquin', '2019040412')['data'] - self.assertEqual(df.head(20)['NHGC1'].values.tolist(), - self.deterministic_frame.head(20)['NHGC1'].values.tolist()) - - def test_get_water_year_trend_tabular(self): - """ - test watershed deterministic forecast download for North San Joaquin on a particular date - """ - df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] - self.assertEqual(df.shape, (365, 9)) - - # def test_get_seasonal_trend_tabular(self): - # cnrfc.get_seasonal_trend_tabular(cnrfc_id, water_year) - - # def test_get_water_year_trend_tabular(self): - # cnrfc.get_water_year_trend_tabular(cnrfc_id, water_year) + # def test_cnrfc_credentials(self): + # """ + # load sensitive info from .env file and test CNRFC credentials exist + # """ + # load_dotenv() + # self.assertTrue(('CNRFC_USER' in os.environ) & ('CNRFC_PASSWORD' in os.environ)) + + # def test_convert_date_columns(self): + # """Ensure datetime data converted to string format""" + # test_index = self.deterministic_frame.index.strftime('%Y-%m-%d') + # self.assertEqual(test_index.tolist()[0], '2019-03-30') + + # def test_validate_duration(self): + # """ + # function to properly format/case hourly or daily durations + # """ + # duration = 'Hourly' + # self.assertEqual(cnrfc.cnrfc._validate_duration(duration), 'hourly') + + # def test_validate_duration_invalid(self): + # """ + # test that invalid duration raises a ValueError + # """ + # bad_input = 'monthly' + # self.assertRaises(ValueError, + # cnrfc.cnrfc._validate_duration, + # bad_input) # def test_get_deterministic_forecast(self): - # cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False, release=False) + # """ + # Test that deterministic forecast start from Graphical_RVF page matches + # CSV start of forecast + # """ + # cnrfc_id = 'FOLC1' + # first_ordinate = cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=True)[-1] + # df = cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False)['data'] + # first_forecast_entry = df['forecast'].dropna().index.tolist()[0] + + # # check that the date/time representation in the timestamp and datetime.datetime objects are the same + # self.assertEqual(first_forecast_entry.year, first_ordinate.year) + # self.assertEqual(first_forecast_entry.month, first_ordinate.month) + # self.assertEqual(first_forecast_entry.day, first_ordinate.day) + # self.assertEqual(first_forecast_entry.hour, first_ordinate.hour) + # self.assertEqual(first_forecast_entry.minute, first_ordinate.minute) + + # # for now, strip the local tzinfo from `first_ordinate` + # self.assertEqual(first_forecast_entry.tzinfo, first_ordinate.replace(tzinfo=None).tzinfo) # def test_get_deterministic_forecast_watershed(self): - # cnrfc.get_deterministic_forecast_watershed(watershed, - # date_string, - # acre_feet=False, - # pdt_convert=False, - # as_pdt=False, - # cnrfc_id=None) - - # def test_get_forecast_meta_deterministic(self): - # cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=False, release=False) + # """ + # test watershed deterministic forecast download for North San Joaquin on a particular date; + # additional future tests to add coverage for arguments: + # - watershed + # - date_string + # - acre_feet=False + # - pdt_convert=False + # - as_pdt=False + # - cnrfc_id=None + # """ + # df = cnrfc.get_deterministic_forecast_watershed('N_SanJoaquin', '2019040412')['data'] + # self.assertEqual(df.head(20)['NHGC1'].values.tolist(), + # self.deterministic_frame.head(20)['NHGC1'].values.tolist()) + # self.assertIsNone(df.index.tzinfo) - # def test_get_ensemble_forecast(self): - # cnrfc.get_ensemble_forecast(cnrfc_id, duration, acre_feet=False, pdt_convert=False, as_pdt=False) - - # def test_get_ensemble_forecast_watershed(self): - # cnrfc.get_ensemble_forecast_watershed(watershed, - # duration, - # date_string, - # acre_feet=False, - # pdt_convert=False, - # as_pdt=False, - # cnrfc_id=None) - - # def test_download_watershed_file(self): - # cnrfc.download_watershed_file(watershed, date_string, forecast_type, duration=None, path=None) - - # def test_get_watershed_forecast_issue_time(self): - # cnrfc.get_watershed_forecast_issue_time(duration, watershed, date_string=None, deterministic=False) - - # def test_get_watershed(self): - # cnrfc.get_watershed(cnrfc_id) + # def test_get_water_year_trend_tabular(self): + # """ + # test water year trend tabular download for a past year for Folsom reservoir forecast point + # """ + # df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] + # self.assertEqual(df.shape, (365, 9)) - # def test_get_ensemble_first_forecast_ordinate(self): - # cnrfc.get_ensemble_first_forecast_ordinate(url=None, df=None) + # def test_get_seasonal_trend_tabular(self): + # """ + # test seasonal trend tabular download for a past year for Shasta reservoir forecast point + # """ + # df = cnrfc.get_seasonal_trend_tabular('SHDC1', 2022)['data'] + # self.assertEqual(df.shape, (365, 10)) - # def test_get_ensemble_product_url(self): - # cnrfc.get_ensemble_product_url(product_id, cnrfc_id, data_format='') + # def test_get_ensemble_forecast(self): + # """ + # test for current ensemble forecast file schema, using Vernalis forecast location + # """ + # result = cnrfc.get_ensemble_forecast('VNSC1', 'hourly', acre_feet=False, pdt_convert=False, as_pdt=False) + # self.assertEqual(result['data'].shape, (721, 43)) + # self.assertIsNone(result['data'].index.tzinfo) + # self.assertEqual(result['info']['watershed'], 'SanJoaquin') + # self.assertEqual(result['info']['units'], 'cfs') # def test_get_ensemble_product_1(self): - # cnrfc.get_ensemble_product_1(cnrfc_id) - - # def test_get_ensemble_product_2(self): - # cnrfc.get_ensemble_product_2(cnrfc_id) + # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_1, 'ORDC1') # def test_get_ensemble_product_3(self): - # cnrfc.get_ensemble_product_3(cnrfc_id) + # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_3, 'ORDC1') # def test_get_ensemble_product_5(self): - # cnrfc.get_ensemble_product_5(cnrfc_id) - - # def test_get_ensemble_product_6(self): - # cnrfc.get_ensemble_product_6(cnrfc_id) - - # def test_get_ensemble_product_10(self): - # cnrfc.get_ensemble_product_10(cnrfc_id) + # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_5, 'ORDC1') # def test_get_ensemble_product_11(self): - # cnrfc.get_ensemble_product_11(cnrfc_id) + # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_11, 'ORDC1') # def test_get_ensemble_product_12(self): - # cnrfc.get_ensemble_product_12(cnrfc_id) + # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_12, 'ORDC1') # def test_get_ensemble_product_13(self): - # cnrfc.get_ensemble_product_13(cnrfc_id) + # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_13, 'ORDC1') # def test_get_data_report_part_8(self): - # cnrfc.get_data_report_part_8() + # self.assertRaises(NotImplementedError, cnrfc.get_data_report_part_8) # def test_get_monthly_reservoir_storage_summary(self): - # cnrfc.get_monthly_reservoir_storage_summary() + # self.assertRaises(NotImplementedError, cnrfc.get_monthly_reservoir_storage_summary) - # def test_esp_trace_analysis_wrapper(self): - # cnrfc.esp_trace_analysis_wrapper() + # def test_get_rating_curve(self): + # """ + # example expected output from get_rating_curve method + # """ + # result = cnrfc.get_rating_curve('DCSC1') + # self.assertEqual(result['data'][0], (0.92, 0.45)) + # self.assertEqual(result['data'][-1], (15.0, 16300.0)) + # self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/data/ratings/DCSC1_rating.js') - # def test__apply_conversions(self): - # cnrfc._apply_conversions(df, duration, acre_feet, pdt_convert, as_pdt) + # def test_get_watershed(self): + # """ + # example usage for looking up watershed group by forecast point ID + # """ + # self.assertEqual(cnrfc.get_watershed('NCOC1'), 'LowerSacramento') - # def test__get_cnrfc_restricted_content(self): - # cnrfc._get_cnrfc_restricted_content(url) + # def test_get_forecast_meta_deterministic(self): + # """ + # test for predicted response with get_forecast_meta_deterministic for Oroville forecast point + # """ + # result = cnrfc.get_forecast_meta_deterministic('ORDC1', first_ordinate=False, release=False) + # self.assertTrue(isinstance(result[0], (dt.date, dt.datetime))) + # self.assertTrue(isinstance(result[1], (dt.date, dt.datetime))) + # self.assertEqual(result[2], 'FEATHER RIVER - LAKE OROVILLE (ORDC1)') + # self.assertEqual(result[3], 'Impaired Inflows') + + def deferred_test_get_ensemble_forecast_watershed(self): + result = cnrfc.get_ensemble_forecast_watershed(watershed, + duration, + date_string, + acre_feet=False, + pdt_convert=False, + as_pdt=False, + cnrfc_id=None) + + def deferred_test_download_watershed_file(self): + result = cnrfc.download_watershed_file(watershed, date_string, forecast_type, duration=None, path=None) + + def deferred_test_get_watershed_forecast_issue_time(self): + result = cnrfc.get_watershed_forecast_issue_time(duration, watershed, date_string=None, deterministic=False) + + def deferred_test_get_ensemble_first_forecast_ordinate(self): + result = cnrfc.get_ensemble_first_forecast_ordinate(url=None, df=None) + + def deferred_test_get_ensemble_product_url(self): + result = cnrfc.get_ensemble_product_url(product_id, cnrfc_id, data_format='') + + def test_get_ensemble_product_2(self): + """ + test for the expected format of ensemble produce #2 + """ + result = cnrfc.get_ensemble_product_2('BDBC1') + self.assertEqual(result['info']['type'], 'Tabular 10-Day Streamflow Volume Accumulation') + self.assertEqual(result['info']['units'], 'TAF') + self.assertEqual(result['data'].shape, (6, 10)) + self.assertEqual(result['data'].index.tolist(), + ['10%', '25%', '50%(Median)', '75%', '90%', 'CNRFCDeterministic Forecast']) - # def test__get_forecast_csv(self): - # cnrfc._get_forecast_csv(url) + def deferred_test_get_ensemble_product_6(self): + result = cnrfc.get_ensemble_product_6(cnrfc_id) - # def test_get_forecast_csvdata(self): - # cnrfc.get_forecast_csvdata(url) + def deferred_test_get_ensemble_product_10(self): + result = cnrfc.get_ensemble_product_10(cnrfc_id) - # def test_get_rating_curve(self): - # cnrfc.get_rating_curve(cnrfc_id) + def deferred_test_esp_trace_analysis_wrapper(self): + result = cnrfc.esp_trace_analysis_wrapper() + + def deferred_test__apply_conversions(self): + result = cnrfc._apply_conversions(df, duration, acre_feet, pdt_convert, as_pdt) + + def deferred_test__get_cnrfc_restricted_content(self): + result = cnrfc._get_cnrfc_restricted_content(url) + + def deferred_test__get_forecast_csv(self): + result = cnrfc._get_forecast_csv(url) + + def deferred_test_get_forecast_csvdata(self): + result = cnrfc.get_forecast_csvdata(url) - # def test__default_date_string(self): - # cnrfc._default_date_string(date_string) + def deferred_test__default_date_string(self): + result = cnrfc._default_date_string(date_string) - # def test__parse_blue_table(self): - # cnrfc._parse_blue_table(table_soup) + def deferred_test__parse_blue_table(self): + result = cnrfc._parse_blue_table(table_soup) if __name__ == '__main__': From 27183bce7cda2d56cdfee4ce4043da043bb6b184 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Tue, 28 Nov 2023 20:03:37 -0800 Subject: [PATCH 14/36] Updates to cnrfc module and add tests --- collect/cnrfc/cnrfc.py | 66 ++++-- collect/tests/test_cnrfc.py | 458 +++++++++++++++++++++++------------- 2 files changed, 339 insertions(+), 185 deletions(-) diff --git a/collect/cnrfc/cnrfc.py b/collect/cnrfc/cnrfc.py index a1142d3..31f808e 100644 --- a/collect/cnrfc/cnrfc.py +++ b/collect/cnrfc/cnrfc.py @@ -17,7 +17,7 @@ from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry from collect.cnrfc.gages import * -from collect.utils.utils import clean_fixed_width_headers, get_web_status +from collect.utils.utils import clean_fixed_width_headers, get_web_status, get_session_response try: from zoneinfo import ZoneInfo @@ -574,38 +574,67 @@ def download_watershed_file(watershed, date_string, forecast_type, duration=None return path +def parse_forecast_archive_table(url): + """ + get the table of Forecast Group, Filename, Date/Time Last Modified and Size for deterministic and ensemble short + and long-range forecasts for watershed groups + + Arguments: + url (str): identifies the product page for watershed forecast products + Returns: + df (pandas.DataFrame): dataframe containing HTML table summarizing last forecast issuances for product page + """ + df = pd.read_html(get_session_response(url).text)[0] + + # extract the header row and assign as column names + df.columns = df.iloc[1,:] + + # drop the invalid headers and filter to relevant columns + df = df.drop([0, 1], axis=0).reindex()[['Forecast Group', 'Filename', 'Date/Time Last Modified', 'Size']] + + # limit frame to valid files containing size info + return df.loc[df['Size'].str.endswith('K')] + + def get_watershed_forecast_issue_time(duration, watershed, date_string=None, deterministic=False): """ get "last modified" date/time stamp from CNRFC watershed ensemble product table + + Arguments: + duration (str): one of 'daily' or 'hourly' + watershed (str): the name of the watershed forecast group + date_string (None or str): None for the latest forecast product or the the YYYYMMDDHH-formatted date + deterministic (bool): flag for whether the watershed deterministic forecast is specified + Returns: + (datetime.datetime or None): the specified last modified date for the watershed product """ duration = _validate_duration(duration) + # store original date_string + _date_string = date_string + + # forecast datestamp prefix + date_string = _default_date_string(date_string) + + # do not return a datetime if the provided date_string is for a past forecast issuance (this is not stored on the + # CNRFC site) + if _date_string is not None and _date_string != _default_date_string(None): + return None + if duration == 'daily': - #" only on the 12" - date_string = date_string[:-2] + '12' url = 'https://www.cnrfc.noaa.gov/ensembleProductCSV.php' - file_name = '{0}_{1}_hefs_csv_{2}.zip' elif duration == 'hourly': url = 'https://www.cnrfc.noaa.gov/ensembleHourlyProductCSV.php' - file_name = '{0}_{1}_hefs_csv_{2}.zip' if deterministic: + if duration == 'daily': + raise ValueError('Long-range (daily) deterministic product does not exist.') url = 'https://www.cnrfc.noaa.gov/deterministicHourlyProductCSV.php' - file_name = '{0}_{1}_csv_export.zip' - - # forecast datestamp prefix - date_string = _default_date_string(date_string) - - # request table from ensemble product page and parse HTML - soup = BeautifulSoup(_get_cnrfc_restricted_content(url), 'lxml') - for td in soup.find_all('td', {'class': 'table-listing-content'}): - if file_name.format(date_string, watershed, duration) in td.text: - issue_time = parser.parse(td.next_sibling.text).astimezone(PACIFIC) - return issue_time - return None - # raise ValueError('No valid issue time for URL.') + # extract last-modified details and filenames from forecast product zipfile table + table = parse_forecast_archive_table(url) + return parser.parse(table.loc[table['Forecast Group']==watershed, 'Date/Time Last Modified'].values[0]) def get_watershed(cnrfc_id): @@ -685,6 +714,7 @@ def get_ensemble_product_2(cnrfc_id): data_table = soup.find_all('table', {'style': 'standardTable'})[0] # parse Tabular 10-Day Streamflow Volume Accumulation (1000s of Acre-Feet) from table + print(data_table) df, notes = _parse_blue_table(data_table) df.set_index('Probability', inplace=True) diff --git a/collect/tests/test_cnrfc.py b/collect/tests/test_cnrfc.py index ebc0aa9..62de9ed 100644 --- a/collect/tests/test_cnrfc.py +++ b/collect/tests/test_cnrfc.py @@ -11,12 +11,17 @@ import unittest import unittest.mock +from bs4 import BeautifulSoup from dotenv import load_dotenv import pandas as pd from collect import cnrfc +def mocked_strftime(*args, **kwargs): + return '2023110112' + + class TestCNRFC(unittest.TestCase): @property @@ -96,163 +101,142 @@ def deterministic_frame(self): dtype={'GMT': str}).mul(1000) return self._deterministic_frame - # def test_cnrfc_credentials(self): - # """ - # load sensitive info from .env file and test CNRFC credentials exist - # """ - # load_dotenv() - # self.assertTrue(('CNRFC_USER' in os.environ) & ('CNRFC_PASSWORD' in os.environ)) - - # def test_convert_date_columns(self): - # """Ensure datetime data converted to string format""" - # test_index = self.deterministic_frame.index.strftime('%Y-%m-%d') - # self.assertEqual(test_index.tolist()[0], '2019-03-30') - - # def test_validate_duration(self): - # """ - # function to properly format/case hourly or daily durations - # """ - # duration = 'Hourly' - # self.assertEqual(cnrfc.cnrfc._validate_duration(duration), 'hourly') - - # def test_validate_duration_invalid(self): - # """ - # test that invalid duration raises a ValueError - # """ - # bad_input = 'monthly' - # self.assertRaises(ValueError, - # cnrfc.cnrfc._validate_duration, - # bad_input) - - # def test_get_deterministic_forecast(self): - # """ - # Test that deterministic forecast start from Graphical_RVF page matches - # CSV start of forecast - # """ - # cnrfc_id = 'FOLC1' - # first_ordinate = cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=True)[-1] - # df = cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False)['data'] - # first_forecast_entry = df['forecast'].dropna().index.tolist()[0] - - # # check that the date/time representation in the timestamp and datetime.datetime objects are the same - # self.assertEqual(first_forecast_entry.year, first_ordinate.year) - # self.assertEqual(first_forecast_entry.month, first_ordinate.month) - # self.assertEqual(first_forecast_entry.day, first_ordinate.day) - # self.assertEqual(first_forecast_entry.hour, first_ordinate.hour) - # self.assertEqual(first_forecast_entry.minute, first_ordinate.minute) - - # # for now, strip the local tzinfo from `first_ordinate` - # self.assertEqual(first_forecast_entry.tzinfo, first_ordinate.replace(tzinfo=None).tzinfo) - - # def test_get_deterministic_forecast_watershed(self): - # """ - # test watershed deterministic forecast download for North San Joaquin on a particular date; - # additional future tests to add coverage for arguments: - # - watershed - # - date_string - # - acre_feet=False - # - pdt_convert=False - # - as_pdt=False - # - cnrfc_id=None - # """ - # df = cnrfc.get_deterministic_forecast_watershed('N_SanJoaquin', '2019040412')['data'] - # self.assertEqual(df.head(20)['NHGC1'].values.tolist(), - # self.deterministic_frame.head(20)['NHGC1'].values.tolist()) - # self.assertIsNone(df.index.tzinfo) - - # def test_get_water_year_trend_tabular(self): - # """ - # test water year trend tabular download for a past year for Folsom reservoir forecast point - # """ - # df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] - # self.assertEqual(df.shape, (365, 9)) - - # def test_get_seasonal_trend_tabular(self): - # """ - # test seasonal trend tabular download for a past year for Shasta reservoir forecast point - # """ - # df = cnrfc.get_seasonal_trend_tabular('SHDC1', 2022)['data'] - # self.assertEqual(df.shape, (365, 10)) - - # def test_get_ensemble_forecast(self): - # """ - # test for current ensemble forecast file schema, using Vernalis forecast location - # """ - # result = cnrfc.get_ensemble_forecast('VNSC1', 'hourly', acre_feet=False, pdt_convert=False, as_pdt=False) - # self.assertEqual(result['data'].shape, (721, 43)) - # self.assertIsNone(result['data'].index.tzinfo) - # self.assertEqual(result['info']['watershed'], 'SanJoaquin') - # self.assertEqual(result['info']['units'], 'cfs') - - # def test_get_ensemble_product_1(self): - # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_1, 'ORDC1') - - # def test_get_ensemble_product_3(self): - # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_3, 'ORDC1') - - # def test_get_ensemble_product_5(self): - # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_5, 'ORDC1') - - # def test_get_ensemble_product_11(self): - # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_11, 'ORDC1') - - # def test_get_ensemble_product_12(self): - # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_12, 'ORDC1') - - # def test_get_ensemble_product_13(self): - # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_13, 'ORDC1') - - # def test_get_data_report_part_8(self): - # self.assertRaises(NotImplementedError, cnrfc.get_data_report_part_8) - - # def test_get_monthly_reservoir_storage_summary(self): - # self.assertRaises(NotImplementedError, cnrfc.get_monthly_reservoir_storage_summary) - - # def test_get_rating_curve(self): - # """ - # example expected output from get_rating_curve method - # """ - # result = cnrfc.get_rating_curve('DCSC1') - # self.assertEqual(result['data'][0], (0.92, 0.45)) - # self.assertEqual(result['data'][-1], (15.0, 16300.0)) - # self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/data/ratings/DCSC1_rating.js') - - # def test_get_watershed(self): - # """ - # example usage for looking up watershed group by forecast point ID - # """ - # self.assertEqual(cnrfc.get_watershed('NCOC1'), 'LowerSacramento') - - # def test_get_forecast_meta_deterministic(self): - # """ - # test for predicted response with get_forecast_meta_deterministic for Oroville forecast point - # """ - # result = cnrfc.get_forecast_meta_deterministic('ORDC1', first_ordinate=False, release=False) - # self.assertTrue(isinstance(result[0], (dt.date, dt.datetime))) - # self.assertTrue(isinstance(result[1], (dt.date, dt.datetime))) - # self.assertEqual(result[2], 'FEATHER RIVER - LAKE OROVILLE (ORDC1)') - # self.assertEqual(result[3], 'Impaired Inflows') + def test_cnrfc_credentials(self): + """ + load sensitive info from .env file and test CNRFC credentials exist + """ + load_dotenv() + self.assertTrue(('CNRFC_USER' in os.environ) & ('CNRFC_PASSWORD' in os.environ)) - def deferred_test_get_ensemble_forecast_watershed(self): - result = cnrfc.get_ensemble_forecast_watershed(watershed, - duration, - date_string, - acre_feet=False, - pdt_convert=False, - as_pdt=False, - cnrfc_id=None) + def test_convert_date_columns(self): + """Ensure datetime data converted to string format""" + test_index = self.deterministic_frame.index.strftime('%Y-%m-%d') + self.assertEqual(test_index.tolist()[0], '2019-03-30') - def deferred_test_download_watershed_file(self): - result = cnrfc.download_watershed_file(watershed, date_string, forecast_type, duration=None, path=None) + def test_validate_duration(self): + """ + function to properly format/case hourly or daily durations + """ + duration = 'Hourly' + self.assertEqual(cnrfc.cnrfc._validate_duration(duration), 'hourly') - def deferred_test_get_watershed_forecast_issue_time(self): - result = cnrfc.get_watershed_forecast_issue_time(duration, watershed, date_string=None, deterministic=False) + def test_validate_duration_invalid(self): + """ + test that invalid duration raises a ValueError + """ + bad_input = 'monthly' + self.assertRaises(ValueError, + cnrfc.cnrfc._validate_duration, + bad_input) - def deferred_test_get_ensemble_first_forecast_ordinate(self): - result = cnrfc.get_ensemble_first_forecast_ordinate(url=None, df=None) + def test_get_deterministic_forecast(self): + """ + Test that deterministic forecast start from Graphical_RVF page matches + CSV start of forecast + """ + cnrfc_id = 'FOLC1' + first_ordinate = cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=True)[-1] + df = cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False)['data'] + first_forecast_entry = df['forecast'].dropna().index.tolist()[0] + + # check that the date/time representation in the timestamp and datetime.datetime objects are the same + self.assertEqual(first_forecast_entry.year, first_ordinate.year) + self.assertEqual(first_forecast_entry.month, first_ordinate.month) + self.assertEqual(first_forecast_entry.day, first_ordinate.day) + self.assertEqual(first_forecast_entry.hour, first_ordinate.hour) + self.assertEqual(first_forecast_entry.minute, first_ordinate.minute) + + # for now, strip the local tzinfo from `first_ordinate` + self.assertEqual(first_forecast_entry.tzinfo, first_ordinate.replace(tzinfo=None).tzinfo) + + def test_get_deterministic_forecast_watershed(self): + """ + test watershed deterministic forecast download for North San Joaquin on a particular date; + additional future tests to add coverage for arguments: + - watershed + - date_string + - acre_feet=False + - pdt_convert=False + - as_pdt=False + - cnrfc_id=None + """ + df = cnrfc.get_deterministic_forecast_watershed('N_SanJoaquin', '2019040412')['data'] + self.assertEqual(df.head(20)['NHGC1'].values.tolist(), + self.deterministic_frame.head(20)['NHGC1'].values.tolist()) + self.assertIsNone(df.index.tzinfo) + + def test_get_water_year_trend_tabular(self): + """ + test water year trend tabular download for a past year for Folsom reservoir forecast point + """ + df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] + self.assertEqual(df.shape, (365, 9)) + + def test_get_seasonal_trend_tabular(self): + """ + test seasonal trend tabular download for a past year for Shasta reservoir forecast point + """ + df = cnrfc.get_seasonal_trend_tabular('SHDC1', 2022)['data'] + self.assertEqual(df.shape, (365, 10)) + + def test_get_ensemble_forecast(self): + """ + test for current ensemble forecast file schema, using Vernalis forecast location + """ + result = cnrfc.get_ensemble_forecast('VNSC1', 'hourly', acre_feet=False, pdt_convert=False, as_pdt=False) + self.assertEqual(result['data'].shape, (721, 43)) + self.assertIsNone(result['data'].index.tzinfo) + self.assertEqual(result['info']['watershed'], 'SanJoaquin') + self.assertEqual(result['info']['units'], 'cfs') + + def test_get_ensemble_product_1(self): + self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_1, 'ORDC1') + + def test_get_ensemble_product_3(self): + self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_3, 'ORDC1') + + def test_get_ensemble_product_5(self): + self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_5, 'ORDC1') + + def test_get_ensemble_product_11(self): + self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_11, 'ORDC1') + + def test_get_ensemble_product_12(self): + self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_12, 'ORDC1') + + def test_get_ensemble_product_13(self): + self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_13, 'ORDC1') - def deferred_test_get_ensemble_product_url(self): - result = cnrfc.get_ensemble_product_url(product_id, cnrfc_id, data_format='') + def test_get_data_report_part_8(self): + self.assertRaises(NotImplementedError, cnrfc.get_data_report_part_8) + + def test_get_monthly_reservoir_storage_summary(self): + self.assertRaises(NotImplementedError, cnrfc.get_monthly_reservoir_storage_summary) + + def test_get_rating_curve(self): + """ + example expected output from get_rating_curve method + """ + result = cnrfc.get_rating_curve('DCSC1') + self.assertEqual(result['data'][0], (0.92, 0.45)) + self.assertEqual(result['data'][-1], (15.0, 16300.0)) + self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/data/ratings/DCSC1_rating.js') + + def test_get_watershed(self): + """ + example usage for looking up watershed group by forecast point ID + """ + self.assertEqual(cnrfc.get_watershed('NCOC1'), 'LowerSacramento') + + def test_get_forecast_meta_deterministic(self): + """ + test for predicted response with get_forecast_meta_deterministic for Oroville forecast point + """ + result = cnrfc.get_forecast_meta_deterministic('ORDC1', first_ordinate=False, release=False) + self.assertIsInstance(result[0], (dt.date, dt.datetime)) + self.assertIsInstance(result[1], (dt.date, dt.datetime)) + self.assertEqual(result[2], 'FEATHER RIVER - LAKE OROVILLE (ORDC1)') + self.assertEqual(result[3], 'Impaired Inflows') def test_get_ensemble_product_2(self): """ @@ -265,33 +249,173 @@ def test_get_ensemble_product_2(self): self.assertEqual(result['data'].index.tolist(), ['10%', '25%', '50%(Median)', '75%', '90%', 'CNRFCDeterministic Forecast']) - def deferred_test_get_ensemble_product_6(self): - result = cnrfc.get_ensemble_product_6(cnrfc_id) + def test_get_watershed_forecast_issue_time(self): + # test for the long range ensemble product + self.assertIsInstance(cnrfc.get_watershed_forecast_issue_time('daily', + 'North Coast', + date_string=None, + deterministic=False), dt.datetime) + + # test for the hourly deterministic product + self.assertIsInstance(cnrfc.get_watershed_forecast_issue_time('hourly', + 'North Coast', + date_string=None, + deterministic=True), dt.datetime) + + # the value None is returned for a specified forecast issuance in the past + self.assertIsNone(cnrfc.get_watershed_forecast_issue_time('daily', + 'North Coast', + date_string='2023010112', + deterministic=False)) + + def test__default_date_string(self): + result = cnrfc.cnrfc._default_date_string(None) + result_dt = dt.datetime.strptime(result, '%Y%m%d%H') + self.assertIsInstance(result, str) + self.assertIsInstance(result_dt, dt.datetime) + self.assertIn(result_dt.hour, [0, 6, 12, 18]) + self.assertEqual(cnrfc.cnrfc._default_date_string('2023112818'), '2023112818') + self.assertRaises(ValueError, cnrfc.cnrfc._default_date_string, '2023112805') + + def test_get_ensemble_product_url(self): + self.assertEqual(cnrfc.get_ensemble_product_url(1, 'VNSC1', data_format=''), + 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=VNSC1&prodID=1') + self.assertEqual(cnrfc.get_ensemble_product_url(3, 'VNSC1', data_format=''), + 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=VNSC1&prodID=3') + self.assertEqual(cnrfc.get_ensemble_product_url(7, 'SHDC1', data_format='Tabular') + 'https://www.cnrfc.noaa.gov/ensembleProductTabular.php?id=SHDC1&prodID=7') + + def test_get_ensemble_product_6(self): + """ + test download and parsing of monthly probability rainbow barchart plot for Shasta location + """ + result = cnrfc.get_ensemble_product_6('SHDC1') + self.assertEqual(result['data'].shape, (7, 12)) + self.assertEqual(result['data'].index.tolist(), ['10%', '25%', '50%', '75%', '90%', 'Mean', '%Mean']) + self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=SHDC1&prodID=6') + self.assertEqual(result['info']['type'], 'Monthly Streamflow Volume (1000s of Acre-Feet)') + self.assertEqual(result['info']['units'], 'TAF') - def deferred_test_get_ensemble_product_10(self): - result = cnrfc.get_ensemble_product_10(cnrfc_id) + def test_get_ensemble_product_10(self): + """ + test download and parsing of water year accumulated volume plot for Shasta location + """ + result = cnrfc.get_ensemble_product_10('SHDC1') + self.assertEqual(result['data'].shape, (5, 12)) + self.assertEqual(result['data'].index.tolist(), ['10%', '25%', '50%(Median)', '75%', '90%']) + self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=SHDC1&prodID=10') + self.assertEqual(result['info']['type'], 'Water Year Accumulated Volume Plot & Tabular Monthly Volume Accumulation') + self.assertEqual(result['info']['units'], 'TAF') + + def test__parse_blue_table(self): + """ + test the processing of included data table for monthly summary associated with ensemble products like 2, 10, etc + """ + table_soup = BeautifulSoup(textwrap.dedent("""/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Title
ProbabilityNov
29
Nov
30
Dec
01
Dec
02
Dec
03
+ 10%12.224.436.749.062.5
+ 25%12.224.436.748.961.4
+ 50%
(Median)
12.224.436.648.961.2
+ """)) + result = cnrfc.cnrfc._parse_blue_table(table_soup) + self.assertIsInstance(result[0], pd.DataFrame) + self.assertEqual(result[0]['Probability'].tolist(), ['10%', '25%', '50%(Median)']) + self.assertIsInstance(result[1], list) + + def test__apply_conversions(self): + """ + test application of UTC->PST/PDT and kcfs->cfs or kcfs->acre-feet unit conversions for a sample ensemble + """ + df = pd.DataFrame(data=[[0.111, 0.222, 0.333]] * 6, + index=pd.date_range('2023-11-01 12:00:00', periods=6, freq='H')) + + # test for conversion of kcfs -> acre-feet, no timezone handling + result = cnrfc.cnrfc._apply_conversions(df, 'hourly', True, False, False) + self.assertIsInstance(result[0], pd.DataFrame) + self.assertEqual(pd.to_datetime(result[0].first_valid_index()), dt.datetime(2023, 11, 1, 12, tzinfo=None)) + self.assertEqual(round(result[0].loc[result[0].first_valid_index(), 0], 6), 9.173554) + self.assertEqual(result[1], 'acre-feet') + + # reset test frame + df = pd.DataFrame(data=[[0.111, 0.222, 0.333]] * 6, + index=pd.date_range('2023-11-01 12:00:00', periods=6, freq='H')) + + # test for conversion of timezone and kcfs -> cfs + result = cnrfc.cnrfc._apply_conversions(df, 'hourly', False, True, True) + self.assertIsInstance(result[0], pd.DataFrame) + try: + from zoneinfo import ZoneInfo + tz_function = ZoneInfo + except: + from pytz import timezone + tz_function = timezone + self.assertEqual(pd.to_datetime(result[0].first_valid_index()), + dt.datetime(2023, 11, 1, 5, tzinfo=tz_function('America/Los_Angeles'))) + self.assertEqual(result[1], 'cfs') + + def deferred_test_get_ensemble_forecast_watershed(self): + result = cnrfc.get_ensemble_forecast_watershed(watershed, + duration, + date_string, + acre_feet=False, + pdt_convert=False, + as_pdt=False, + cnrfc_id=None) + + def deferred_test_download_watershed_file(self): + result = cnrfc.download_watershed_file(watershed, date_string, forecast_type, duration=None, path=None) + + def deferred_test_get_ensemble_first_forecast_ordinate(self): + result = cnrfc.get_ensemble_first_forecast_ordinate(url=None, df=None) def deferred_test_esp_trace_analysis_wrapper(self): result = cnrfc.esp_trace_analysis_wrapper() - def deferred_test__apply_conversions(self): - result = cnrfc._apply_conversions(df, duration, acre_feet, pdt_convert, as_pdt) - def deferred_test__get_cnrfc_restricted_content(self): - result = cnrfc._get_cnrfc_restricted_content(url) + result = cnrfc.cnrfc._get_cnrfc_restricted_content(url) def deferred_test__get_forecast_csv(self): - result = cnrfc._get_forecast_csv(url) + result = cnrfc.cnrfc._get_forecast_csv(url) def deferred_test_get_forecast_csvdata(self): result = cnrfc.get_forecast_csvdata(url) - def deferred_test__default_date_string(self): - result = cnrfc._default_date_string(date_string) - - def deferred_test__parse_blue_table(self): - result = cnrfc._parse_blue_table(table_soup) - if __name__ == '__main__': unittest.main() From f6c4e6e73d889149f702bf151267d6d33269d0fe Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 29 Nov 2023 11:56:00 -0800 Subject: [PATCH 15/36] CNRFC module updates for stability and testing --- collect/cnrfc/cnrfc.py | 315 +++++++++++++++++++++--------------- collect/cnrfc/gages.py | 13 ++ collect/tests/test_cnrfc.py | 161 ++++++++++++++---- collect/utils/utils.py | 9 +- 4 files changed, 341 insertions(+), 157 deletions(-) diff --git a/collect/cnrfc/cnrfc.py b/collect/cnrfc/cnrfc.py index 31f808e..aebf6b0 100644 --- a/collect/cnrfc/cnrfc.py +++ b/collect/cnrfc/cnrfc.py @@ -14,10 +14,8 @@ from dotenv import load_dotenv import pandas as pd import requests -from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry from collect.cnrfc.gages import * -from collect.utils.utils import clean_fixed_width_headers, get_web_status, get_session_response +from collect.utils import utils try: from zoneinfo import ZoneInfo @@ -35,14 +33,10 @@ # load credentials load_dotenv() -# disable warnings in crontab logs -import urllib3 -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - def get_seasonal_trend_tabular(cnrfc_id, water_year): """ - CNRFC Ensemble Product 7, includes Apr-Jul Forecast 90%, 75%, 50%, 25%, and 10% Exceedance, NWS Apr-Jul Forecast, + CNRFC Ensemble Product 7, includes Apr-Jul Forecast 90%, 75%, 50%, 25%, and 10% Exceedance, NWS Apr-Jul Forecast, Raw Obs Apr-Jul To Date, Raw Avg Apr-Jul To Date, Raw Daily Observation adapted from data accessed in py_water_supply_reporter.py example url: https://www.cnrfc.noaa.gov/ensembleProductTabular.php?id=HLEC1&prodID=7&year=2013 @@ -52,12 +46,11 @@ def get_seasonal_trend_tabular(cnrfc_id, water_year): Returns: (dict): data and info """ - url = get_ensemble_product_url(product_id=7, cnrfc_id=cnrfc_id, data_format='Tabular') - url += '&year={0}'.format(water_year) + url += f'&year={water_year}' assert int(water_year) >= 2011, "Ensemble Forecast Product 7 not available before 2011" - + # retrieve from public CNRFC webpage result = BeautifulSoup(_get_cnrfc_restricted_content(url), 'lxml').find('pre').text.replace('#', '') @@ -65,17 +58,17 @@ def get_seasonal_trend_tabular(cnrfc_id, water_year): with io.StringIO(result) as buf: # parse fixed-width text-formatted table - df = pd.read_fwf(buf, - header=[0, 1, 2, 3, 4], - skiprows=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16], + df = pd.read_fwf(buf, + header=[0, 1, 2, 3, 4], + skiprows=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16], na_values=['Missing', 'Missing']) # clean columns and fix spelling in source - df.columns = clean_fixed_width_headers(df.columns) - df.rename({x: x.replace('Foreacst', 'Forecast').replace('Foreacast', 'Forecast') + df.columns = utils.clean_fixed_width_headers(df.columns) + df.rename({x: x.replace('Foreacst', 'Forecast').replace('Foreacast', 'Forecast') for x in df.columns}, axis=1, inplace=True) - # clean missing data rows + # clean missing data rows df.dropna(subset=['Date (mm/dd/YYYY)'], inplace=True) df.drop(df.last_valid_index(), axis=0, inplace=True) @@ -90,7 +83,7 @@ def get_seasonal_trend_tabular(cnrfc_id, water_year): if bool(line.strip()): k, v = line.strip().split(': ') summary.update({k: v.strip()}) - + return {'data': df, 'info': {'url': url, 'type': 'Seasonal Trend Tabular (Apr-Jul)', 'title': notes[0], @@ -101,7 +94,7 @@ def get_seasonal_trend_tabular(cnrfc_id, water_year): def get_water_year_trend_tabular(cnrfc_id, water_year): """ - CNRFC Ensemble Product 9, which includes WY Forecast 90% Exceedance, 75% Exceedance, 50% Exceedance, 25% Exceedance, + CNRFC Ensemble Product 9, which includes WY Forecast 90% Exceedance, 75% Exceedance, 50% Exceedance, 25% Exceedance, 10% Exceedance, Raw WY To Date Observation, Raw WY To Date Average, Raw Daily Observation #example url: https://www.cnrfc.noaa.gov/ensembleProductTabular.php?id=FOLC1&prodID=9&year=2022# Arguments: @@ -115,7 +108,7 @@ def get_water_year_trend_tabular(cnrfc_id, water_year): url += '&year={0}'.format(water_year) assert int(water_year) >= 2013, "Ensemble Forecast Product 9 not available before 2013" - + # retrieve from public CNRFC webpage result = BeautifulSoup(_get_cnrfc_restricted_content(url), 'lxml').find('pre').text.replace('#', '') @@ -123,14 +116,14 @@ def get_water_year_trend_tabular(cnrfc_id, water_year): with io.StringIO(result) as buf: # parse fixed-width text-formatted table - df = pd.read_fwf(buf, - header=[0, 1, 2, 3], - skiprows=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 17], + df = pd.read_fwf(buf, + header=[0, 1, 2, 3], + skiprows=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 17], na_values=['Missing', 'Missing']) # clean columns and fix spelling in source - df.columns = clean_fixed_width_headers(df.columns) - df.rename({x: x.replace('Foreacst', 'Forecast').replace('Foreacast', 'Forecast') + df.columns = utils.clean_fixed_width_headers(df.columns) + df.rename({x: x.replace('Foreacst', 'Forecast').replace('Foreacast', 'Forecast') for x in df.columns}, axis=1, inplace=True) # clean missing data rows @@ -148,7 +141,7 @@ def get_water_year_trend_tabular(cnrfc_id, water_year): if bool(line.strip()): k, v = line.strip().split(': ') summary.update({k: v.strip()}) - + return {'data': df, 'info': {'url': url, 'type': 'Water Year Trend Tabular', 'title': notes[0], @@ -166,7 +159,7 @@ def get_deterministic_forecast(cnrfc_id, truncate_historical=False, release=Fals convert CSV data to DataFrame, separating historical from forecast inflow series Note: as of March 2022, deterministic forecasts retrieved with the graphicalRVF or - graphicalRelease URLs return CSVs of 3 different formats with headers that + graphicalRelease URLs return CSVs of 3 different formats with headers that may also include stage information Arguments: @@ -183,9 +176,9 @@ def get_deterministic_forecast(cnrfc_id, truncate_historical=False, release=Fals # default deterministic URL and index name url = 'https://www.cnrfc.noaa.gov/graphical{0}_csv.php?id={1}'.format(forecast_type, cnrfc_id) date_column_header = 'Valid Date/Time (Pacific)' - specified_dtypes = {date_column_header: str, + specified_dtypes = {date_column_header: str, 'Stage (Feet)': float, - f'{flow_prefix}Flow (CFS)': float, + f'{flow_prefix}Flow (CFS)': float, 'Trend': str, 'Issuance Date/Time (Pacific)': str, 'Threshold Exceedance Status': str, @@ -195,16 +188,16 @@ def get_deterministic_forecast(cnrfc_id, truncate_historical=False, release=Fals if cnrfc_id in RESTRICTED: url = 'https://www.cnrfc.noaa.gov/restricted/graphical{0}_csv.php?id={1}'.format(forecast_type, cnrfc_id) date_column_header = 'Date/Time (Pacific Time)' - specified_dtypes = {date_column_header: str, - f'{flow_prefix}Flow (CFS)': float, + specified_dtypes = {date_column_header: str, + f'{flow_prefix}Flow (CFS)': float, 'Trend': str} # get forecast file from csv url csvdata = _get_forecast_csv(url) # read historical and forecast series from CSV - df = pd.read_csv(csvdata, - header=0, + df = pd.read_csv(csvdata, + header=0, parse_dates=True, float_precision='high', dtype=specified_dtypes) @@ -237,7 +230,7 @@ def get_deterministic_forecast(cnrfc_id, truncate_historical=False, release=Fals return {'data': df, 'info': {'url': url, 'type': f'Deterministic {flow_prefix}Forecast', 'title': title, - 'plot_type': plot_type, + 'plot_type': plot_type, 'first_ordinate': first_ordinate.strftime('%Y-%m-%d %H:%M'), 'issue_time': time_issued.strftime('%Y-%m-%d %H:%M'), 'next_issue': next_issue_time.strftime('%Y-%m-%d %H:%M'), @@ -272,13 +265,13 @@ def get_deterministic_forecast_watershed(watershed, date_string, acre_feet=False url = 'https://www.cnrfc.noaa.gov/csv/{0}_{1}_csv_export.zip'.format(date_string, watershed) # extract CSV from zip object - if get_web_status(url): + if utils.get_web_status(url): try: csvdata = _get_forecast_csv(url) except zipfile.BadZipFile: print(f'ERROR: forecast for {date_string} has not yet been issued.') raise zipfile.BadZipFile - + # raise error if user supplied an actual date string but that forecast doesn't exist elif _date_string is not None: print(f'ERROR: forecast for {date_string} has not yet been issued.') @@ -287,17 +280,17 @@ def get_deterministic_forecast_watershed(watershed, date_string, acre_feet=False # try previous forecast until a valid file is found else: stamp = dt.datetime.strptime(date_string, '%Y%m%d%H') - while not get_web_status(url): + while not utils.get_web_status(url): stamp -= dt.timedelta(hours=6) url = 'https://www.cnrfc.noaa.gov/csv/{0:%Y%m%d%H}_{1}_csv_export.zip'.format(stamp, watershed) date_string = stamp.strftime('%Y%m%d%H') csvdata = _get_forecast_csv(url) # parse forecast data from CSV - df = pd.read_csv(csvdata, - header=0, - skiprows=[1,], - parse_dates=True, + df = pd.read_csv(csvdata, + header=0, + skiprows=[1,], + parse_dates=True, index_col=0, float_precision='high', dtype={'GMT': str}) @@ -315,10 +308,10 @@ def get_deterministic_forecast_watershed(watershed, date_string, acre_feet=False # forecast issue time time_issued = get_watershed_forecast_issue_time('hourly', watershed, date_string, deterministic=True) - return {'data': df, 'info': {'url': url, - 'type': 'Deterministic Forecast', + return {'data': df, 'info': {'url': url, + 'type': 'Deterministic Forecast', 'issue_time': time_issued.strftime('%Y-%m-%d %H:%M') if time_issued is not None else time_issued, - 'watershed': watershed, + 'watershed': watershed, 'units': units, 'downloaded': dt.datetime.now().strftime('%Y-%m-%d %H:%M')}} @@ -451,13 +444,13 @@ def get_ensemble_forecast_watershed(watershed, duration, date_string, acre_feet= url = 'https://www.cnrfc.noaa.gov/csv/{0}_{1}_hefs_csv_{2}.zip'.format(date_string, watershed, duration) # extract CSV from zip object - if get_web_status(url): + if utils.get_web_status(url): try: csvdata = _get_forecast_csv(url) except zipfile.BadZipFile: print(f'ERROR: forecast for {date_string} has not yet been issued.') raise zipfile.BadZipFile - + # raise error if user supplied an actual date string but that forecast doesn't exist elif _date_string is not None: print(f'ERROR: forecast for {date_string} has not yet been issued.') @@ -466,17 +459,17 @@ def get_ensemble_forecast_watershed(watershed, duration, date_string, acre_feet= # try previous forecast until a valid file is found else: stamp = dt.datetime.strptime(date_string, '%Y%m%d%H') - while not get_web_status(url): + while not utils.get_web_status(url): stamp -= dt.timedelta(hours=6) url = 'https://www.cnrfc.noaa.gov/csv/{0:%Y%m%d%H}_{1}_hefs_csv_{2}.zip'.format(stamp, watershed, duration) date_string = stamp.strftime('%Y%m%d%H') csvdata = _get_forecast_csv(url) # parse forecast data from CSV - df = pd.read_csv(csvdata, - header=0, - skiprows=[1,], - parse_dates=True, + df = pd.read_csv(csvdata, + header=0, + skiprows=[1,], + parse_dates=True, index_col=0, float_precision='high', dtype={'GMT': str}) @@ -484,26 +477,26 @@ def get_ensemble_forecast_watershed(watershed, duration, date_string, acre_feet= # filter watershed for single forecast point ensemble, if provided if cnrfc_id is not None: df = df.filter(regex=r'^{0}((\.\d+)?)$'.format(cnrfc_id)) - + # convert kcfs to cfs; optional timezone conversions and optional conversion to acre-feet df, units = _apply_conversions(df, duration, acre_feet, pdt_convert, as_pdt) # get date/time stamp from ensemble download page time_issued = get_watershed_forecast_issue_time(duration, watershed, date_string) - + return {'data': df, 'info': {'url': url, 'watershed': watershed, 'issue_time': time_issued.strftime('%Y-%m-%d %H:%M') if time_issued is not None else time_issued, 'first_ordinate': get_ensemble_first_forecast_ordinate(df=df).strftime('%Y-%m-%d %H:%M'), - 'units': units, + 'units': units, 'duration': duration, 'downloaded': dt.datetime.now().strftime('%Y-%m-%d %H:%M')}} def download_watershed_file(watershed, date_string, forecast_type, duration=None, path=None, return_content=False): """ - download short range ensemble, deterministic forecast, and seasonal outlook for the - watershed as zipped file, unzip, save as csv to path + download short range ensemble, deterministic forecast, and seasonal outlook for the watershed as zipped file, unzip, + save as csv to path Arguments: watershed (str): the forecast group identifier @@ -536,7 +529,7 @@ def download_watershed_file(watershed, date_string, forecast_type, duration=None url = 'https://www.cnrfc.noaa.gov/csv/' + url_end # extract CSV from zip object - if get_web_status(url): + if utils.get_web_status(url): try: csvdata = _get_forecast_csv(url) except zipfile.BadZipFile: @@ -584,7 +577,7 @@ def parse_forecast_archive_table(url): Returns: df (pandas.DataFrame): dataframe containing HTML table summarizing last forecast issuances for product page """ - df = pd.read_html(get_session_response(url).text)[0] + df = pd.read_html(utils.get_session_response(url).text)[0] # extract the header row and assign as column names df.columns = df.iloc[1,:] @@ -634,7 +627,8 @@ def get_watershed_forecast_issue_time(duration, watershed, date_string=None, det # extract last-modified details and filenames from forecast product zipfile table table = parse_forecast_archive_table(url) - return parser.parse(table.loc[table['Forecast Group']==watershed, 'Date/Time Last Modified'].values[0]) + return parser.parse(table.loc[table['Forecast Group']==get_watershed_formatted(watershed), + 'Date/Time Last Modified'].values[0]) def get_watershed(cnrfc_id): @@ -664,6 +658,30 @@ def get_watershed(cnrfc_id): raise ValueError('cnrfc_id not recognized.') +def get_watershed_formatted(watershed): + """ + get associated hydrologic region for CNRFC forecast location + """ + return {'klamath': 'Klamath', + 'NorthCoast': 'North Coast', + 'RussianNapa': 'Russian/Napa', + 'UpperSacramento': 'Upper Sacramento', + 'FeatherYuba': 'Feather/Yuba', + 'CachePutah': 'Cache/Putah', + 'american': 'American', + 'LowerSacramento': 'Lower Sacramento', + 'CentralCoast': 'Central Coast', + 'SouthernCalifornia': 'Southern California', + 'Tulare': 'Tulare', + 'SanJoaquin': 'San Joaquin', + 'N_SanJoaquin': 'North San Joaquin', + 'EastSierra': 'East Sierra', + 'Humboldt': 'Humboldt', + 'SalinasPajaro': 'Salinas/Pajaro', + 'SouthBay': 'South Bay', + 'SanDiego_Inland': 'San Diego/Inland'}.get(watershed, watershed) + + def get_ensemble_first_forecast_ordinate(url=None, df=None): """ return the first date of the forecast (GMT) as datetime object @@ -694,10 +712,8 @@ def get_ensemble_product_1(cnrfc_id): raise NotImplementedError url = get_ensemble_product_url(1, cnrfc_id) - get_web_status(url) - return {'data': None, 'info': {'url': url, - 'type': '10-Day Probability Plot', - 'units': 'TAF'}} + utils.get_web_status(url) + return {'data': None, 'info': {'url': url, 'type': '10-Day Probability Plot', 'units': 'TAF'}} def get_ensemble_product_2(cnrfc_id): @@ -707,14 +723,13 @@ def get_ensemble_product_2(cnrfc_id): (alt text source: https://www.cnrfc.noaa.gov/awipsProducts/RNOWRK10D.php) """ url = get_ensemble_product_url(2, cnrfc_id) - get_web_status(url) + utils.get_web_status(url) # request Ensemble Product 2 page content soup = BeautifulSoup(_get_cnrfc_restricted_content(url), 'lxml') data_table = soup.find_all('table', {'style': 'standardTable'})[0] # parse Tabular 10-Day Streamflow Volume Accumulation (1000s of Acre-Feet) from table - print(data_table) df, notes = _parse_blue_table(data_table) df.set_index('Probability', inplace=True) @@ -723,7 +738,7 @@ def get_ensemble_product_2(cnrfc_id): title, time_issued = str(td.find('strong')).split('
') time_issued = time_issued.rstrip('').lstrip('Data Updated: ') - return {'data': df, 'info': {'url': url, + return {'data': df, 'info': {'url': url, 'type': 'Tabular 10-Day Streamflow Volume Accumulation', 'issue_time': time_issued, 'units': 'TAF', @@ -736,10 +751,8 @@ def get_ensemble_product_3(cnrfc_id): raise NotImplementedError url = get_ensemble_product_url(3, cnrfc_id) - get_web_status(url) - return {'data': None, 'info': {'url': url, - 'type': '5-Day Peaks Plot', - 'units': 'TAF'}} + utils.get_web_status(url) + return {'data': None, 'info': {'url': url, 'type': '5-Day Peaks Plot','units': 'TAF'}} def get_ensemble_product_5(cnrfc_id): @@ -748,10 +761,8 @@ def get_ensemble_product_5(cnrfc_id): raise NotImplementedError url = get_ensemble_product_url(5, cnrfc_id) - get_web_status(url) - return {'data': None, 'info': {'url': url, - 'type': 'Tabular 5-Day Volume Accumulations', - 'units': 'TAF'}} + utils.get_web_status(url) + return {'data': None, 'info': {'url': url, 'type': 'Tabular 5-Day Volume Accumulations', 'units': 'TAF'}} def get_ensemble_product_6(cnrfc_id): @@ -760,7 +771,7 @@ def get_ensemble_product_6(cnrfc_id): the ensemble product page """ url = get_ensemble_product_url(6, cnrfc_id) - get_web_status(url) + utils.get_web_status(url) # request Ensemble Product 6 page content soup = BeautifulSoup(_get_cnrfc_restricted_content(url), 'lxml') @@ -776,7 +787,7 @@ def get_ensemble_product_6(cnrfc_id): time_issued = time_issued.rstrip('').lstrip('Data Updated: ') title = title.lstrip('') - return {'data': df, 'info': {'url': url, + return {'data': df, 'info': {'url': url, 'type': title, 'issue_time': time_issued, 'units': 'TAF', @@ -792,7 +803,7 @@ def get_ensemble_product_10(cnrfc_id): @narlesky TO DO - recreate graphic """ url = get_ensemble_product_url(10, cnrfc_id) - get_web_status(url) + utils.get_web_status(url) # request Ensemble Product 10 page content soup = BeautifulSoup(_get_cnrfc_restricted_content(url), 'lxml') @@ -802,8 +813,8 @@ def get_ensemble_product_10(cnrfc_id): df, notes = _parse_blue_table(data_table) df.set_index('Probability', inplace=True) - return {'data': df, 'info': {'url': url, - 'note': '@narlesky TO DO - recreate graphic', + return {'data': df, 'info': {'url': url, + 'note': '@narlesky TO DO - recreate graphic', 'type': 'Water Year Accumulated Volume Plot & Tabular Monthly Volume Accumulation', 'units': 'TAF', 'downloaded': dt.datetime.now().strftime('%Y-%m-%d %H:%M')}} @@ -817,8 +828,8 @@ def get_ensemble_product_11(cnrfc_id): raise NotImplementedError url = get_ensemble_product_url(11, cnrfc_id) - get_web_status(url) - return {'data': None, 'info': {'url': url, + utils.get_web_status(url) + return {'data': None, 'info': {'url': url, 'type': 'Multi-Year Accumulated Volume Plot & Tabular Monthly Volume Accumulation', 'units': 'TAF'}} @@ -829,8 +840,8 @@ def get_ensemble_product_12(cnrfc_id): raise NotImplementedError url = get_ensemble_product_url(12, cnrfc_id) - get_web_status(url) - return {'data': None, 'info': {'url': url, + utils.get_web_status(url) + return {'data': None, 'info': {'url': url, 'type': 'Historical Flows (Water Year & Seasonal (Apr-Jul)', 'units': 'TAF'}} @@ -841,10 +852,8 @@ def get_ensemble_product_13(cnrfc_id): raise NotImplementedError url = get_ensemble_product_url(13, cnrfc_id) - get_web_status(url) - return {'data': None, 'info': {'url': url, - 'type': 'Water Resources Verification', - 'units': 'TAF'}} + utils.get_web_status(url) + return {'data': None, 'info': {'url': url, 'type': 'Water Resources Verification', 'units': 'TAF'}} def get_data_report_part_8(): @@ -854,8 +863,8 @@ def get_data_report_part_8(): raise NotImplementedError url = 'https://www.wrh.noaa.gov/cnrfc/rsa_getprod.php?prod=RNORR8RSA&wfo=cnrfc&version=0' - get_web_status(url) - return {'data': None, 'info': {'url': url, + utils.get_web_status(url) + return {'data': None, 'info': {'url': url, 'type': 'Hydrology-meteorology Data Report Part 8', 'units': 'TAF'}} @@ -864,16 +873,96 @@ def get_monthly_reservoir_storage_summary(): raise NotImplementedError url = 'https://www.cnrfc.noaa.gov/awipsProducts/RNORR6RSA.php' - get_web_status(url) - return {'data': None, 'info': {'url': url, + utils.get_web_status(url) + return {'data': None, 'info': {'url': url, 'type': 'CNRFC Monthly Reservoir Storage Summary', 'units': 'TAF'}} -def esp_trace_analysis_wrapper(): +def get_esp_trace_analysis_url(cnrfc_id, + interval='day', + value_type='mean', + plot_type='traces', + table_type='forecastInfo', + product_type='table', + date_string=None, + start_date_string=None, + end_date_string=None): """ + https://www.cnrfc.noaa.gov/esp_trace_analysis.php describes the menu of options for building an ensemble forecast + product from the following options + 1. Select an HEFS Trace Location + 2. Select an Accumulation Type + 3. Select an Interval + 4. Select a Distribution Type + 5. Select a Starting Date + 6. Select an Ending Date + 7. Select a Plot Option and Generate + 8. Select a Table Option and Generate + The base url for the user interface to do so is at https://www.cnrfc.noaa.gov/ensembleProduct.php + + Arguments: + cnrfc_id (str): HEFS trace location + interval (str): horizon for the product + value_type (str): accumulation type to apply to the traces + plot_type (str): plot option + table_type (str): table option + product_type (str): product format option + date_string (str): optional forecast date as a string in format YYYYMMDD; defaults to most recent forecast date + start_date_string (str): optional analysis start date formatted as YYYYMMDD + end_date_string (None, str): optional analysis end date formatted as YYYYMMDD + Returns: + url (str): the string url for the product + Raises: + ValueError """ - url = 'https://www.cnrfc.noaa.gov/esp_trace_analysis.php' + url = 'https://www.cnrfc.noaa.gov/ensembleProduct.php?' + + # url query parameters + query_args = [f'id={cnrfc_id}', + 'prodID=8', # for "build your own" + f'interval={interval}', + f'valueType={value_type}', + f'plotType={plot_type}', + f'tableType={table_type}', + f'productType={product_type}'] + + if any([x is not None for x in [date_string, start_date_string, end_date_string]]): + query_args.append('dateSelection=custom') + + if date_string is not None: + if len(date_string) != 8: + raise ValueError(f'invalid `date_string`: {date_string}') + query_args.append(f'date={date_string}') + + if start_date_string is not None: + if len(start_date_string) != 8: + raise ValueError(f'invalid `start_date_string`: {start_date_string}') + query_args.append(f'endDate={start_date_string}') + + if end_date_string is not None: + if len(end_date_string) != 8: + raise ValueError(f'invalid `end_date_string`: {end_date_string}') + query_args.append(f'endDate={end_date_string}') + + if interval not in ['day', 'week', 'month', 'period']: + raise ValueError(f'invalid `interval`: {interval}') + + if value_type not in ['mean', 'min', 'max', 'sum']: + raise ValueError(f'invalid `value_type`: {value_type}') + + if plot_type not in ['traces', 'probability', 'expectedValue', 'exceedance']: + raise ValueError(f'invalid `plot_type`: {plot_type}') + + if table_type not in ['forecastInfo', 'quantiles']: + raise ValueError(f'invalid `table_type`: {table_type}') + + if product_type not in ['table', 'plot']: + raise ValueError(f'invalid `product_type`: {product_type}') + + # construct the URL + url += '&'.join(query_args) + return url def _apply_conversions(df, duration, acre_feet, pdt_convert, as_pdt): @@ -905,7 +994,7 @@ def _get_cnrfc_restricted_content(url): request page from CNRFC restricted site """ basic_auth = requests.auth.HTTPBasicAuth(os.getenv('CNRFC_USER'), os.getenv('CNRFC_PASSWORD')) - content = requests.get(url, auth=basic_auth).content + content = utils.get_session_response(url, auth=basic_auth).content return content @@ -929,12 +1018,7 @@ def _get_forecast_csv(url): basic_auth = requests.auth.HTTPBasicAuth(os.getenv('CNRFC_USER'), os.getenv('CNRFC_PASSWORD')) # initialize requests session with retries - session = requests.Session() - retries = Retry(total=5, - backoff_factor=0.1, - status_forcelist=[500, 502, 503, 504]) - session.mount('https://', HTTPAdapter(max_retries=retries)) - content = session.get(url, auth=basic_auth, verify=False).content + content = utils.get_session_response(url, auth=basic_auth).content # handle zipfiles if '.zip' in filename: @@ -964,7 +1048,7 @@ def get_forecast_csvdata(url): def get_rating_curve(cnrfc_id): """ returns paired flow and stage data parsed from the text of CNRFC rating curve JavaScript files - + Arguments: cnrfc_id (str): forecast point (such as FOLC1) Returns: @@ -972,7 +1056,7 @@ def get_rating_curve(cnrfc_id): """ # retrieve data from URL url = f'https://www.cnrfc.noaa.gov/data/ratings/{cnrfc_id}_rating.js' - response = requests.get(url) + response = utils.get_session_response(url) # check if data exists if response.status_code == 200: @@ -993,13 +1077,10 @@ def get_rating_curve(cnrfc_id): data = list(zip(stage_data, flow_data)) else: - print(f'Error accessing rating curve URL for: {cnrfc_id}') + print(f'ERROR: Error accessing rating curve URL for: {cnrfc_id}') data = None - return {'data': data, - 'info': {'url': url, - 'cnrfc_id': cnrfc_id} - } + return {'data': data, 'info': {'url': url, 'cnrfc_id': cnrfc_id}} def _default_date_string(date_string): @@ -1058,23 +1139,3 @@ def _parse_blue_table(table_soup): # format as dataframe df = pd.DataFrame(rows, columns=columns).replace({'--': float('nan')}) return df, notes - - -if __name__ == '__main__': - - # Folsom | FOLC1 - # New Bullards Bar | NBBC1 - # Oroville | ORDC1 - # Pine Flat | PNFC1 - # Shasta | SHDC1 - - # print(get_ensemble_product_1('FOLC1')) - # print(get_deterministic_forecast('SHDC1', truncate_historical=False)['data'].head()) - # print(get_ensemble_forecast('SHDC1', 'h')['data'].head()) - # print(get_deterministic_forecast_watershed('UpperSacramento', None)['data'].head()) - # print(get_ensemble_forecast_watershed('UpperSacramento', 'hourly', None)['data'].head()) - # print(get_seasonal_trend_tabular('SHDC1', 2018)['data'].head()) - - - # print(get_ensemble_product_2('ORDC1')) - get_ensemble_product_6('ORDC1') diff --git a/collect/cnrfc/gages.py b/collect/cnrfc/gages.py index 1d6cac9..f121fd6 100644 --- a/collect/cnrfc/gages.py +++ b/collect/cnrfc/gages.py @@ -42,9 +42,16 @@ 'RRGC1L', 'RRGC1F', 'RUFC1L', 'SVCC1F', 'SVCC1L', 'HLLC1L', 'HLLC1SPL'] +# Oct 2022 onwards +SALINASPAJARO_GAGES = [] + +# Oct 2022 onwards +SOUTHBAY_GAGES = [] + LOWERSACRAMENTO_GAGES = ['SAMC1', 'SACC1', 'VONC1', 'FMWC1', 'DRMC1', 'RCVC1', 'FMWC1L', 'SACC1L', 'SAMC1L', 'NCOC1', 'NCOC1L'] +# before Oct 2022 CENTRALCOAST_GAGES = ['LWDC1', 'SNRC1', 'NBYC1', 'NACC1', 'PRBC1', 'RDRC1', 'BSRC1', 'PIIC1', 'TESC1', 'HOSC1', 'PHOC1', 'AROC1', 'BTEC1', 'COYC1', 'ANDC1', 'CYTC1', 'CYEC1', 'CMIC1', @@ -60,6 +67,9 @@ 'MVDC1O', 'MVVC1', 'HAWC1', 'ELPC1', 'SVIC1', 'FSNC1', 'CYBC1', 'HSAC1'] +# Oct 2022 onwards +SANDIEGO_INLAND_GAGES = [] + TULARE_GAGES = ['KKVC1', 'SKRC1', 'ISAC1', 'SCSC1', 'KTRC1', 'TMDC1', 'PFTC1', 'DLMC1', 'MLPC1'] @@ -82,6 +92,9 @@ 'ROCN2', 'HBMN2', 'CMSN2', 'HRIN2', 'LHPN2', 'MARN2', 'MDCN2'] +WATER_SUPPLY_INDICES = ['SACC0', 'VNSC0', 'MLIC0'] + + ALL = sorted(set(KLAMATH_GAGES + NORTHCOAST_GAGES + RUSSIANNAPA_GAGES + diff --git a/collect/tests/test_cnrfc.py b/collect/tests/test_cnrfc.py index 62de9ed..b08bc38 100644 --- a/collect/tests/test_cnrfc.py +++ b/collect/tests/test_cnrfc.py @@ -17,9 +17,13 @@ from collect import cnrfc - -def mocked_strftime(*args, **kwargs): - return '2023110112' +# alternate timezone representation depending on Python version +try: + from zoneinfo import ZoneInfo + tz_function = ZoneInfo +except: + from pytz import timezone + tz_function = timezone class TestCNRFC(unittest.TestCase): @@ -190,27 +194,51 @@ def test_get_ensemble_forecast(self): self.assertEqual(result['info']['units'], 'cfs') def test_get_ensemble_product_1(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_1, 'ORDC1') def test_get_ensemble_product_3(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_3, 'ORDC1') def test_get_ensemble_product_5(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_5, 'ORDC1') def test_get_ensemble_product_11(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_11, 'ORDC1') def test_get_ensemble_product_12(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_12, 'ORDC1') def test_get_ensemble_product_13(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_13, 'ORDC1') def test_get_data_report_part_8(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ self.assertRaises(NotImplementedError, cnrfc.get_data_report_part_8) def test_get_monthly_reservoir_storage_summary(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ self.assertRaises(NotImplementedError, cnrfc.get_monthly_reservoir_storage_summary) def test_get_rating_curve(self): @@ -282,7 +310,7 @@ def test_get_ensemble_product_url(self): 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=VNSC1&prodID=1') self.assertEqual(cnrfc.get_ensemble_product_url(3, 'VNSC1', data_format=''), 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=VNSC1&prodID=3') - self.assertEqual(cnrfc.get_ensemble_product_url(7, 'SHDC1', data_format='Tabular') + self.assertEqual(cnrfc.get_ensemble_product_url(7, 'SHDC1', data_format='Tabular'), 'https://www.cnrfc.noaa.gov/ensembleProductTabular.php?id=SHDC1&prodID=7') def test_get_ensemble_product_6(self): @@ -352,7 +380,7 @@ def test__parse_blue_table(self): 61.2 - """)) + """), 'lxml') result = cnrfc.cnrfc._parse_blue_table(table_soup) self.assertIsInstance(result[0], pd.DataFrame) self.assertEqual(result[0]['Probability'].tolist(), ['10%', '25%', '50%(Median)']) @@ -364,7 +392,7 @@ def test__apply_conversions(self): """ df = pd.DataFrame(data=[[0.111, 0.222, 0.333]] * 6, index=pd.date_range('2023-11-01 12:00:00', periods=6, freq='H')) - + # test for conversion of kcfs -> acre-feet, no timezone handling result = cnrfc.cnrfc._apply_conversions(df, 'hourly', True, False, False) self.assertIsInstance(result[0], pd.DataFrame) @@ -379,42 +407,119 @@ def test__apply_conversions(self): # test for conversion of timezone and kcfs -> cfs result = cnrfc.cnrfc._apply_conversions(df, 'hourly', False, True, True) self.assertIsInstance(result[0], pd.DataFrame) - try: - from zoneinfo import ZoneInfo - tz_function = ZoneInfo - except: - from pytz import timezone - tz_function = timezone self.assertEqual(pd.to_datetime(result[0].first_valid_index()), dt.datetime(2023, 11, 1, 5, tzinfo=tz_function('America/Los_Angeles'))) self.assertEqual(result[1], 'cfs') - def deferred_test_get_ensemble_forecast_watershed(self): - result = cnrfc.get_ensemble_forecast_watershed(watershed, - duration, - date_string, + def test_get_ensemble_forecast_watershed(self): + """ + test for retrieiving an ensemble forecast watershed file for a forecast issuance prior to most recent + """ + result = cnrfc.get_ensemble_forecast_watershed('SalinasPajaro', + 'hourly', + '2023010118', acre_feet=False, pdt_convert=False, as_pdt=False, cnrfc_id=None) + self.assertEqual(result['data'].shape, (721, 924)) + self.assertEqual(result['data'].tail(1)['BTEC1'].values[0], 226.94) + self.assertEqual(pd.to_datetime(result['data'].last_valid_index()), dt.datetime(2023, 1, 31, 18, 0, 0)) + self.assertEqual(result['info']['watershed'], 'SalinasPajaro') + self.assertEqual(result['info']['url'], + 'https://www.cnrfc.noaa.gov/csv/2023010118_SalinasPajaro_hefs_csv_hourly.zip') + self.assertIsNone(result['info']['issue_time']) + + def test_get_esp_trace_analysis_url(self): + """ + test that the build-your-own trace analysis product url is properly constructed for the provided options + """ + url = cnrfc.get_esp_trace_analysis_url('BTYO3', + interval='day', + value_type='mean', + plot_type='traces', + table_type='forecastInfo', + product_type='table', + date_string='20231106', + end_date_string='20231231') + expected_url = '&'.join(['https://www.cnrfc.noaa.gov/ensembleProduct.php?id=BTYO3', + 'prodID=8', + 'interval=day', + 'valueType=mean', + 'plotType=traces', + 'tableType=forecastInfo', + 'productType=table', + 'dateSelection=custom', + 'date=20231106', + 'endDate=20231231']) + self.maxDiff = 800 + self.assertEqual(url, expected_url) + + def test_get_ensemble_first_forecast_ordinate(self): + """ + test that the first ensemble forecast ordinate is a datetime in the past + """ + result = cnrfc.get_ensemble_first_forecast_ordinate( + url='https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv', + df=None + ) + self.assertIsInstance(result, dt.datetime) + result_utc = result.replace(tzinfo=tz_function('UTC')) + self.assertLess(result_utc, dt.datetime.now(tz=tz_function('UTC'))) + + def test__get_forecast_csv(self): + """ + test for forecast CSV data retrieval to in-memory filelike object (private method) + """ + result = cnrfc.cnrfc._get_forecast_csv('https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv') + self.assertIsInstance(result, io.BytesIO) - def deferred_test_download_watershed_file(self): - result = cnrfc.download_watershed_file(watershed, date_string, forecast_type, duration=None, path=None) + # check first line contains forecast point headers + self.assertTrue(result.readline().decode('utf-8').startswith('GMT,HLEC1')) - def deferred_test_get_ensemble_first_forecast_ordinate(self): - result = cnrfc.get_ensemble_first_forecast_ordinate(url=None, df=None) + # check second line contains variables identifiers + self.assertTrue(result.readline().decode('utf-8').startswith(',QINE,QINE')) - def deferred_test_esp_trace_analysis_wrapper(self): - result = cnrfc.esp_trace_analysis_wrapper() + # check third line contains expected timeseries info + self.assertTrue(result.readline().decode('utf-8').startswith('2023-11-29 12:00:00,0.89311')) + + def test_get_forecast_csvdata(self): + """ + test for forecast CSV data retrieval to in-memory filelike object (public method); duplicate of + test__get_forecast_csv + """ + result = cnrfc.get_forecast_csvdata('https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv') + self.assertIsInstance(result, io.BytesIO) + self.assertTrue(result.readline().decode('utf-8').startswith('GMT,HLEC1')) + self.assertTrue(result.readline().decode('utf-8').startswith(',QINE,QINE')) + self.assertTrue(result.readline().decode('utf-8').startswith('2023-11-29 12:00:00,0.89311')) + + def test__get_cnrfc_restricted_content(self): + """ + test that restricted content can be accessed through the provided credentials + """ + result = cnrfc.cnrfc._get_cnrfc_restricted_content( + 'https://www.cnrfc.noaa.gov/restricted/graphicalRVF_tabular.php?id=FOLC1' + ) + sample = BeautifulSoup(result, 'lxml').find('pre').text.splitlines()[:9] + self.assertEqual(sample[2], '# Location: American River - Folsom Lake (FOLC1)') + self.assertTrue(sample[-1].startswith('# Maximum Observed Flow:')) + + def test_download_watershed_file(self): + """ + test for downloading watershed file to local file system (in this case, downloaded to in-memory object) + """ + result = cnrfc.download_watershed_file('WSI', '2023010112', 'ensemble', duration='daily', path=io.BytesIO()) + self.assertIsInstance(result, io.BytesIO) - def deferred_test__get_cnrfc_restricted_content(self): - result = cnrfc.cnrfc._get_cnrfc_restricted_content(url) + # check first line contains forecast point headers + self.assertTrue(result.readline().decode('utf-8').startswith('GMT,SACC0')) - def deferred_test__get_forecast_csv(self): - result = cnrfc.cnrfc._get_forecast_csv(url) + # check second line contains variables identifiers + self.assertTrue(result.readline().decode('utf-8').startswith(',SQME,SQME')) - def deferred_test_get_forecast_csvdata(self): - result = cnrfc.get_forecast_csvdata(url) + # check third line contains expected timeseries info + self.assertTrue(result.readline().decode('utf-8').startswith('2023-01-01 12:00:00,252.83904,')) if __name__ == '__main__': diff --git a/collect/utils/utils.py b/collect/utils/utils.py index 29ef8a9..f44e033 100644 --- a/collect/utils/utils.py +++ b/collect/utils/utils.py @@ -4,8 +4,13 @@ The utilities module of MBK Engineers' collect project """ # -*- coding: utf-8 -*- +import urllib3 +# disable warnings in crontab logs +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + import urllib3.contrib.pyopenssl urllib3.contrib.pyopenssl.inject_into_urllib3() + import ssl import requests @@ -13,7 +18,7 @@ from requests.adapters import HTTPAdapter -def get_session_response(url): +def get_session_response(url, auth=None): """ Arguments: url (str): valid web URL @@ -25,7 +30,7 @@ def get_session_response(url): backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]) session.mount('https://', HTTPAdapter(max_retries=retries)) - return session.get(url, verify=ssl.CERT_NONE) + return session.get(url, auth=auth, verify=ssl.CERT_NONE) def get_web_status(url): From 266aa15371e30f39140a11e673cdec90c132f293 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 29 Nov 2023 12:18:11 -0800 Subject: [PATCH 16/36] Update test docstrings --- collect/tests/test_alert.py | 4 +-- collect/tests/test_cnrfc.py | 4 +-- collect/tests/test_cvo.py | 4 +-- collect/tests/test_dwr.py | 72 ++++++++++++++++++------------------- collect/tests/test_nid.py | 4 +-- collect/tests/test_usace.py | 4 +-- collect/tests/test_usgs.py | 4 +-- collect/tests/test_utils.py | 4 +-- 8 files changed, 50 insertions(+), 50 deletions(-) diff --git a/collect/tests/test_alert.py b/collect/tests/test_alert.py index 758e499..d054eed 100644 --- a/collect/tests/test_alert.py +++ b/collect/tests/test_alert.py @@ -1,7 +1,7 @@ """ -collect.tests.test_basics +collect.tests.test_alert ============================================================ -initial test suite for collect data access and utility functions; note: these tests require internet connection +initial test suite for collect.alert data access and utility functions; note: these tests require internet connection """ # -*- coding: utf-8 -*- import datetime as dt diff --git a/collect/tests/test_cnrfc.py b/collect/tests/test_cnrfc.py index b08bc38..c9d233f 100644 --- a/collect/tests/test_cnrfc.py +++ b/collect/tests/test_cnrfc.py @@ -1,7 +1,7 @@ """ -collect.tests.test_basics +collect.tests.test_cnrfc ============================================================ -initial test suite for collect data access and utility functions; note: these tests require internet connection +initial test suite for collect.cnrfc data access and utility functions; note: these tests require internet connection """ # -*- coding: utf-8 -*- import datetime as dt diff --git a/collect/tests/test_cvo.py b/collect/tests/test_cvo.py index 43c0d86..73bf9cb 100644 --- a/collect/tests/test_cvo.py +++ b/collect/tests/test_cvo.py @@ -1,7 +1,7 @@ """ -collect.tests.test_basics +collect.tests.test_cvo ============================================================ -initial test suite for collect data access and utility functions; note: these tests require internet connection +initial test suite for collect.cvo data access and utility functions; note: these tests require internet connection """ # -*- coding: utf-8 -*- import datetime as dt diff --git a/collect/tests/test_dwr.py b/collect/tests/test_dwr.py index 5342a5c..4e26b4d 100644 --- a/collect/tests/test_dwr.py +++ b/collect/tests/test_dwr.py @@ -1,7 +1,7 @@ """ -collect.tests.test_basics +collect.tests.test_dwr ============================================================ -initial test suite for collect data access and utility functions; note: these tests require internet connection +initial test suite for collect.dwr data access and utility functions; note: these tests require internet connection """ # -*- coding: utf-8 -*- import datetime as dt @@ -29,7 +29,7 @@ class TestCASGEM(unittest.TestCase): dwr.casgem module references inactive API; CASGEM tools must be updated once CNRA completes web transition """ - def test_get_casgem_data(self): + def deferred_test_get_casgem_data(self): return casgem_id_result = casgem.get_casgem_data( @@ -69,109 +69,109 @@ class TestCAWDL(unittest.TestCase): """ dwr.cawdl module references inactive API; CAWDL tools must be updated once CNRA/DWR completes web transition """ - def test_get_cawdl_data(self): + def deferred_test_get_cawdl_data(self): cawdl.get_cawdl_data('17202') - def test_get_cawdl_surface_water_data(self): + def deferred_test_get_cawdl_surface_water_data(self): cawdl.get_cawdl_surface_water_data('17202', 2021, 'FLOW', interval='DAILY_MEAN') - def test_get_cawdl_surface_water_por(self): + def deferred_test_get_cawdl_surface_water_por(self): cawdl.get_cawdl_surface_water_por('17202', 'FLOW', interval='DAILY_MEAN') - def test_get_cawdl_surface_water_site_report(self): + def deferred_test_get_cawdl_surface_water_site_report(self): cawdl.get_cawdl_surface_water_site_report('17202') class TestCDEC(unittest.TestCase): - def test_get_b120_data(self): + def deferred_test_get_b120_data(self): b120.get_b120_data(date_suffix='') - def test_validate_date_suffix(self): + def deferred_test_validate_date_suffix(self): b120.validate_date_suffix(date_suffix, min_year=2017) - def test_clean_td(self): + def deferred_test_clean_td(self): b120.clean_td(text) - def test_get_b120_update_data(self): + def deferred_test_get_b120_update_data(self): b120.get_b120_update_data(date_suffix='') - def test_get_120_archived_reports(self): + def deferred_test_get_120_archived_reports(self): b120.get_120_archived_reports(year, month) - def test_april_july_dataframe(self): + def deferred_test_april_july_dataframe(self): b120.april_july_dataframe(data_list) - def test_get_station_url(self): + def deferred_test_get_station_url(self): cdec.get_station_url(station, start, end, data_format='CSV', sensors=[], duration='') - def test_get_station_sensors(self): + def deferred_test_get_station_sensors(self): cdec.get_station_sensors(station, start, end) - def test_get_station_data(self): + def deferred_test_get_station_data(self): cdec.get_station_data(station, start, end, sensors=[], duration='') - def test_get_raw_station_csv(self): + def deferred_test_get_raw_station_csv(self): cdec.get_raw_station_csv(station, start, end, sensors=[], duration='', filename='') - def test_get_raw_station_json(self): + def deferred_test_get_raw_station_json(self): cdec.get_raw_station_json(station, start, end, sensors=[], duration='', filename='') - def test_get_sensor_frame(self): + def deferred_test_get_sensor_frame(self): cdec.get_sensor_frame(station, start, end, sensor='', duration='') - def test_get_station_metadata(self): + def deferred_test_get_station_metadata(self): cdec.get_station_metadata(station, as_geojson=False) - def test_get_dam_metadata(self): + def deferred_test_get_dam_metadata(self): cdec.get_dam_metadata(station) - def test_get_reservoir_metadata(self): + def deferred_test_get_reservoir_metadata(self): cdec.get_reservoir_metadata(station) - def test__get_table_index(self): + def deferred_test__get_table_index(self): cdec._get_table_index(table_type, tables) - def test__parse_station_generic_table(self): + def deferred_test__parse_station_generic_table(self): cdec._parse_station_generic_table(table) - def test__parse_station_sensors_table(self): + def deferred_test__parse_station_sensors_table(self): cdec._parse_station_sensors_table(table) - def test__parse_station_comments_table(self): + def deferred_test__parse_station_comments_table(self): cdec._parse_station_comments_table(table) - def test__parse_data_available(self): + def deferred_test__parse_data_available(self): cdec._parse_data_available(text) - def test_get_data(self): + def deferred_test_get_data(self): cdec.get_data(station, start, end, sensor='', duration='') - def test_get_daily_snowpack_data(self): + def deferred_test_get_daily_snowpack_data(self): cdec.get_daily_snowpack_data(region, start, end) class TestSWP(unittest.TestCase): - def test_prompt_installation_and_exit(self): + def deferred_test_prompt_installation_and_exit(self): swp.prompt_installation_and_exit() - def test_get_report_catalog(self): + def deferred_test_get_report_catalog(self): swp.get_report_catalog() - def test_get_report_url(self): + def deferred_test_get_report_url(self): swp.get_report_url() - def test_get_raw_text(self): + def deferred_test_get_raw_text(self): swp.get_raw_text() - def test_get_delta_daily_data(self): + def deferred_test_get_delta_daily_data(self): swp.get_delta_daily_data() - def test_get_barker_slough_data(self): + def deferred_test_get_barker_slough_data(self): swp.get_barker_slough_data() - def test_get_oco_tabular_data(self): + def deferred_test_get_oco_tabular_data(self): swp.get_oco_tabular_data() diff --git a/collect/tests/test_nid.py b/collect/tests/test_nid.py index 6a244cf..9051c59 100644 --- a/collect/tests/test_nid.py +++ b/collect/tests/test_nid.py @@ -1,7 +1,7 @@ """ -collect.tests.test_basics +collect.tests.test_nid ============================================================ -initial test suite for collect data access and utility functions; note: these tests require internet connection +initial test suite for collect.nid data access and utility functions; note: these tests require internet connection """ # -*- coding: utf-8 -*- import datetime as dt diff --git a/collect/tests/test_usace.py b/collect/tests/test_usace.py index 5fbe4c6..6e21411 100644 --- a/collect/tests/test_usace.py +++ b/collect/tests/test_usace.py @@ -1,7 +1,7 @@ """ -collect.tests.test_basics +collect.tests.test_usace ============================================================ -initial test suite for collect data access and utility functions; note: these tests require internet connection +initial test suite for collect.usace data access and utility functions; note: these tests require internet connection """ # -*- coding: utf-8 -*- import datetime as dt diff --git a/collect/tests/test_usgs.py b/collect/tests/test_usgs.py index 50015c4..c2469f2 100644 --- a/collect/tests/test_usgs.py +++ b/collect/tests/test_usgs.py @@ -1,7 +1,7 @@ """ -collect.tests.test_basics +collect.tests.test_usgs ============================================================ -initial test suite for collect data access and utility functions; note: these tests require internet connection +initial test suite for collect.usgs data access and utility functions; note: these tests require internet connection """ # -*- coding: utf-8 -*- import datetime as dt diff --git a/collect/tests/test_utils.py b/collect/tests/test_utils.py index 583e5b5..ca6998d 100644 --- a/collect/tests/test_utils.py +++ b/collect/tests/test_utils.py @@ -1,7 +1,7 @@ """ -collect.tests.test_basics +collect.tests.test_utils ============================================================ -initial test suite for collect data access and utility functions; note: these tests require internet connection +initial test suite for collect.utils utility functions; note: these tests require internet connection """ # -*- coding: utf-8 -*- import datetime as dt From f10420adaed7552ac297197abd093369a54b163f Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 29 Nov 2023 14:28:58 -0800 Subject: [PATCH 17/36] Add cross-version support for localizing naive datetimes --- collect/cnrfc/cnrfc.py | 18 +- collect/tests/test_cnrfc.py | 938 ++++++++++++++++++------------------ collect/utils/utils.py | 31 +- 3 files changed, 503 insertions(+), 484 deletions(-) diff --git a/collect/cnrfc/cnrfc.py b/collect/cnrfc/cnrfc.py index aebf6b0..2defa7a 100644 --- a/collect/cnrfc/cnrfc.py +++ b/collect/cnrfc/cnrfc.py @@ -17,18 +17,6 @@ from collect.cnrfc.gages import * from collect.utils import utils -try: - from zoneinfo import ZoneInfo - UTC = ZoneInfo('UTC') - PACIFIC = ZoneInfo('America/Los_Angeles') - -except: - import pytz - UTC = pytz.timezone('UTC') - PACIFIC = pytz.timezone('America/Los_Angeles') - -TODAY = dt.datetime.now().strftime('%Y%m%d') - # load credentials load_dotenv() @@ -981,9 +969,9 @@ def _apply_conversions(df, duration, acre_feet, pdt_convert, as_pdt): if pdt_convert: df.index = df.index.tz_localize('UTC').tz_convert('America/Los_Angeles') df.index.name = 'America/Los_Angeles' - + elif as_pdt: - df.index = [PACIFIC.localize(x) for x in df.index] + df.index = [utils.get_localized_datetime(x, 'America/Los_Angeles') for x in df.index] df.index.name = 'America/Los_Angeles' return df, units @@ -1088,7 +1076,7 @@ def _default_date_string(date_string): supply expected latest forecast datestamp or use defined date_string argument """ if date_string is None: - now = dt.datetime.now().astimezone(UTC) + now = utils.get_localized_datetime(dt.datetime.now(), 'UTC') date_string = now.strftime('%Y%m%d{0:02.0f}'.format(6 * math.floor(now.hour/6))) # hour validation diff --git a/collect/tests/test_cnrfc.py b/collect/tests/test_cnrfc.py index c9d233f..6ce8b3a 100644 --- a/collect/tests/test_cnrfc.py +++ b/collect/tests/test_cnrfc.py @@ -28,363 +28,363 @@ class TestCNRFC(unittest.TestCase): - @property - def deterministic_frame(self): - """ - fixture for testing watershed deterministic file handling - """ - if not hasattr(self, '_deterministic_frame'): - text_data = io.StringIO(textwrap.dedent("""\ - GMT,CMPC1,NHGC1,MSGC1,FRGC1,EDOC1,SOSC1,MHBC1,MCNC1 - ,QINE,QINE,QINE,QINE,QINE,QINE,QINE,QINE - 2019-03-30 12:00:00,2.45972,0.70641,0.08901,0.22803,1.03512,0.71908,2.83132,2.58248 - 2019-03-30 13:00:00,2.44774,0.67366,0.08901,0.21302,1.03512,0.70908,2.88032,2.56875 - 2019-03-30 14:00:00,2.43568,0.67408,0.08901,0.19602,1.03011,0.71208,2.84732,2.53694 - 2019-03-30 15:00:00,2.42353,0.67424,0.08901,0.22903,1.02611,0.70608,2.83132,2.52791 - 2019-03-30 16:00:00,2.41129,0.67558,0.08901,0.20202,1.02211,0.70208,2.83132,2.50098 - 2019-03-30 17:00:00,2.39895,0.60832,0.08901,0.21002,1.01811,0.70208,2.81431,2.4876 - 2019-03-30 18:00:00,2.38652,0.64266,0.08901,0.18302,1.00911,0.69608,2.83132,2.46544 - 2019-03-30 19:00:00,2.38077,0.67591,0.08701,0.20202,1.00511,0.69208,2.79831,2.45222 - 2019-03-30 20:00:00,2.37473,0.67491,0.08701,0.18602,1.00111,0.69208,2.79831,2.44343 - 2019-03-30 21:00:00,2.36843,0.67599,0.08601,0.19602,0.99211,0.68908,2.79831,2.42595 - 2019-03-30 22:00:00,2.36185,0.67599,0.08601,0.03374,0.99211,0.68208,2.74931,2.41724 - 2019-03-30 23:00:00,2.35498,0.71033,0.08601,0.19102,0.98411,0.68208,2.78231,2.40856 - 2019-03-31 00:00:00,2.34785,0.67608,0.08401,0.16702,0.98011,0.67608,2.74931,2.39559 - 2019-03-31 01:00:00,2.32832,0.67508,0.08401,0.19902,0.97111,0.66607,2.7163,2.38698 - 2019-03-31 02:00:00,2.30886,0.67608,0.08401,0.16302,0.96311,0.65907,2.7003,2.36982 - 2019-03-31 03:00:00,2.28949,0.64274,0.08401,0.19302,0.96311,0.65607,2.7163,2.36555 - 2019-03-31 04:00:00,2.2702,0.6084,0.08401,0.03239,0.95511,0.66907,2.7163,2.34852 - 2019-03-31 05:00:00,2.25098,0.60724,0.08401,0.17702,0.94711,0.65907,2.6843,2.34004 - 2019-03-31 06:00:00,2.23185,0.64141,0.08401,0.15302,0.9261,0.65907,2.6683,2.33159 - 2019-03-31 07:00:00,2.22434,0.60915,0.08401,0.16402,0.9141,0.65607,2.6843,2.31896 - 2019-03-31 08:00:00,2.21675,0.5749,0.08201,0.17202,0.9141,0.66207,2.62029,2.3022 - 2019-03-31 09:00:00,2.2091,0.60815,0.08201,0.15802,0.9101,0.65907,2.63629,2.2897 - 2019-03-31 10:00:00,2.20137,0.64241,0.08101,0.16702,0.9141,0.65907,2.58829,2.27725 - 2019-03-31 11:00:00,2.19357,0.60924,0.08101,0.16802,0.9141,0.65907,2.57229,2.26486 - 2019-03-31 12:00:00,2.1857,0.57507,0.08101,0.15402,0.9101,0.65307,2.57229,2.25253 - 2019-03-31 13:00:00,2.17421,0.60832,0.08101,0.15102,0.9141,0.65307,2.58829,2.23544 - 2019-03-31 14:00:00,2.16274,0.64257,0.08101,0.18902,0.9101,0.65607,2.55728,2.21627 - 2019-03-31 15:00:00,2.15131,0.60832,0.08101,0.03094,0.9101,0.64907,2.57229,2.20199 - 2019-03-31 16:00:00,2.1399,0.54081,0.08101,0.14802,0.9061,0.64307,2.55728,2.18779 - 2019-03-31 17:00:00,2.12853,0.54081,0.08101,0.03072,0.9061,0.64607,2.57229,2.16429 - 2019-03-31 18:00:00,2.11718,0.57515,0.08101,0.14502,0.8981,0.64607,2.57229,2.15495 - 2019-03-31 19:00:00,2.11344,0.57523,0.08101,0.15802,0.9021,0.64007,2.55728,2.13637 - 2019-03-31 20:00:00,2.10957,0.57531,0.07901,0.14302,0.8981,0.64307,2.54128,2.13174 - 2019-03-31 21:00:00,2.10557,0.5764,0.07901,0.16502,0.8861,0.63707,2.55728,2.12713 - 2019-03-31 22:00:00,2.10143,0.63047,0.07901,0.15202,0.8901,0.62707,2.54128,2.11793 - 2019-03-31 23:00:00,2.09715,0.6617,0.07901,0.13502,0.8821,0.62707,2.54128,2.11793 - 2019-04-01 00:00:00,2.09274,0.64507,0.07901,0.03001,0.8781,0.61807,2.51028,2.11334 - 2019-04-01 01:00:00,2.08882,0.61182,0.07701,0.02992,0.8741,0.62107,2.52628,2.10875 - 2019-04-01 02:00:00,2.08483,0.51206,0.07701,0.02983,0.8701,0.61807,2.49528,2.09962 - 2019-04-01 03:00:00,2.08079,0.51205,0.07701,0.02974,0.8661,0.61207,2.48028,2.09506 - 2019-04-01 04:00:00,2.07668,0.51206,0.07701,0.02964,0.8621,0.61207,2.49528,2.09051 - 2019-04-01 05:00:00,2.07251,0.51206,0.07701,0.02955,0.8541,0.61507,2.48028,2.08144 - 2019-04-01 06:00:00,2.06829,0.51206,0.07701,0.02946,0.85109,0.62107,2.44927,2.07692 - 2019-04-01 07:00:00,2.07789,0.51206,0.07701,0.13001,0.84709,0.62407,2.43427,2.0679 - 2019-04-01 08:00:00,2.08712,0.51206,0.07701,0.02929,0.84709,0.63007,2.44927,2.0634 - 2019-04-01 09:00:00,2.09597,0.51206,0.07701,0.13502,0.84709,0.62107,2.41927,2.04996 - 2019-04-01 10:00:00,2.10444,0.50556,0.07701,0.02911,0.84709,0.63407,2.43427,2.04104 - 2019-04-01 11:00:00,2.11255,0.60507,0.07601,0.02903,0.84709,0.63407,2.41927,2.02772 - 2019-04-01 12:00:00,2.12029,0.63774,0.07601,0.02894,0.84709,0.62707,2.41927,2.01888 - 2019-04-01 13:00:00,2.12346,0.59182,0.07601,0.11601,0.85109,0.63707,2.38927,2.00568 - 2019-04-01 14:00:00,2.12662,0.55896,0.07601,0.11201,0.85109,0.63407,2.41927,1.99255 - 2019-04-01 15:00:00,2.1298,0.57073,0.07401,0.12301,0.85109,0.62707,2.40427,1.98384 - 2019-04-01 16:00:00,2.13297,0.5924,0.07401,0.12401,0.85109,0.63007,2.43427,1.97516 - 2019-04-01 17:00:00,2.13613,0.54539,0.07401,0.12901,0.84709,0.62707,2.41927,1.96652 - 2019-04-01 18:00:00,2.13929,0.53298,0.07401,0.12101,0.85109,0.63007,2.25725,1.95791 - 2019-04-01 19:00:00,2.14021,0.56206,0.07301,0.10801,0.84309,0.62107,2.25725,1.95791 - 2019-04-01 20:00:00,2.14111,0.56231,0.07301,0.12001,0.84309,0.62107,2.27225,1.95361 - 2019-04-01 21:00:00,2.142,0.52906,0.07301,0.10601,0.83909,0.61807,2.27225,1.94932""")) - self._deterministic_frame = pd.read_csv(text_data, - header=0, - skiprows=[1,], - nrows=60, - parse_dates=True, - index_col=0, - float_precision='high', - dtype={'GMT': str}).mul(1000) - return self._deterministic_frame - - def test_cnrfc_credentials(self): - """ - load sensitive info from .env file and test CNRFC credentials exist - """ - load_dotenv() - self.assertTrue(('CNRFC_USER' in os.environ) & ('CNRFC_PASSWORD' in os.environ)) - - def test_convert_date_columns(self): - """Ensure datetime data converted to string format""" - test_index = self.deterministic_frame.index.strftime('%Y-%m-%d') - self.assertEqual(test_index.tolist()[0], '2019-03-30') - - def test_validate_duration(self): - """ - function to properly format/case hourly or daily durations - """ - duration = 'Hourly' - self.assertEqual(cnrfc.cnrfc._validate_duration(duration), 'hourly') - - def test_validate_duration_invalid(self): - """ - test that invalid duration raises a ValueError - """ - bad_input = 'monthly' - self.assertRaises(ValueError, - cnrfc.cnrfc._validate_duration, - bad_input) - - def test_get_deterministic_forecast(self): - """ - Test that deterministic forecast start from Graphical_RVF page matches - CSV start of forecast - """ - cnrfc_id = 'FOLC1' - first_ordinate = cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=True)[-1] - df = cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False)['data'] - first_forecast_entry = df['forecast'].dropna().index.tolist()[0] - - # check that the date/time representation in the timestamp and datetime.datetime objects are the same - self.assertEqual(first_forecast_entry.year, first_ordinate.year) - self.assertEqual(first_forecast_entry.month, first_ordinate.month) - self.assertEqual(first_forecast_entry.day, first_ordinate.day) - self.assertEqual(first_forecast_entry.hour, first_ordinate.hour) - self.assertEqual(first_forecast_entry.minute, first_ordinate.minute) - - # for now, strip the local tzinfo from `first_ordinate` - self.assertEqual(first_forecast_entry.tzinfo, first_ordinate.replace(tzinfo=None).tzinfo) - - def test_get_deterministic_forecast_watershed(self): - """ - test watershed deterministic forecast download for North San Joaquin on a particular date; - additional future tests to add coverage for arguments: - - watershed - - date_string - - acre_feet=False - - pdt_convert=False - - as_pdt=False - - cnrfc_id=None - """ - df = cnrfc.get_deterministic_forecast_watershed('N_SanJoaquin', '2019040412')['data'] - self.assertEqual(df.head(20)['NHGC1'].values.tolist(), - self.deterministic_frame.head(20)['NHGC1'].values.tolist()) - self.assertIsNone(df.index.tzinfo) - - def test_get_water_year_trend_tabular(self): - """ - test water year trend tabular download for a past year for Folsom reservoir forecast point - """ - df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] - self.assertEqual(df.shape, (365, 9)) - - def test_get_seasonal_trend_tabular(self): - """ - test seasonal trend tabular download for a past year for Shasta reservoir forecast point - """ - df = cnrfc.get_seasonal_trend_tabular('SHDC1', 2022)['data'] - self.assertEqual(df.shape, (365, 10)) - - def test_get_ensemble_forecast(self): - """ - test for current ensemble forecast file schema, using Vernalis forecast location - """ - result = cnrfc.get_ensemble_forecast('VNSC1', 'hourly', acre_feet=False, pdt_convert=False, as_pdt=False) - self.assertEqual(result['data'].shape, (721, 43)) - self.assertIsNone(result['data'].index.tzinfo) - self.assertEqual(result['info']['watershed'], 'SanJoaquin') - self.assertEqual(result['info']['units'], 'cfs') - - def test_get_ensemble_product_1(self): - """ - as this method is not yet implemented in the cnrfc module, it is expected to raise an error - """ - self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_1, 'ORDC1') - - def test_get_ensemble_product_3(self): - """ - as this method is not yet implemented in the cnrfc module, it is expected to raise an error - """ - self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_3, 'ORDC1') - - def test_get_ensemble_product_5(self): - """ - as this method is not yet implemented in the cnrfc module, it is expected to raise an error - """ - self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_5, 'ORDC1') - - def test_get_ensemble_product_11(self): - """ - as this method is not yet implemented in the cnrfc module, it is expected to raise an error - """ - self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_11, 'ORDC1') - - def test_get_ensemble_product_12(self): - """ - as this method is not yet implemented in the cnrfc module, it is expected to raise an error - """ - self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_12, 'ORDC1') - - def test_get_ensemble_product_13(self): - """ - as this method is not yet implemented in the cnrfc module, it is expected to raise an error - """ - self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_13, 'ORDC1') - - def test_get_data_report_part_8(self): - """ - as this method is not yet implemented in the cnrfc module, it is expected to raise an error - """ - self.assertRaises(NotImplementedError, cnrfc.get_data_report_part_8) - - def test_get_monthly_reservoir_storage_summary(self): - """ - as this method is not yet implemented in the cnrfc module, it is expected to raise an error - """ - self.assertRaises(NotImplementedError, cnrfc.get_monthly_reservoir_storage_summary) - - def test_get_rating_curve(self): - """ - example expected output from get_rating_curve method - """ - result = cnrfc.get_rating_curve('DCSC1') - self.assertEqual(result['data'][0], (0.92, 0.45)) - self.assertEqual(result['data'][-1], (15.0, 16300.0)) - self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/data/ratings/DCSC1_rating.js') - - def test_get_watershed(self): - """ - example usage for looking up watershed group by forecast point ID - """ - self.assertEqual(cnrfc.get_watershed('NCOC1'), 'LowerSacramento') - - def test_get_forecast_meta_deterministic(self): - """ - test for predicted response with get_forecast_meta_deterministic for Oroville forecast point - """ - result = cnrfc.get_forecast_meta_deterministic('ORDC1', first_ordinate=False, release=False) - self.assertIsInstance(result[0], (dt.date, dt.datetime)) - self.assertIsInstance(result[1], (dt.date, dt.datetime)) - self.assertEqual(result[2], 'FEATHER RIVER - LAKE OROVILLE (ORDC1)') - self.assertEqual(result[3], 'Impaired Inflows') - - def test_get_ensemble_product_2(self): - """ - test for the expected format of ensemble produce #2 - """ - result = cnrfc.get_ensemble_product_2('BDBC1') - self.assertEqual(result['info']['type'], 'Tabular 10-Day Streamflow Volume Accumulation') - self.assertEqual(result['info']['units'], 'TAF') - self.assertEqual(result['data'].shape, (6, 10)) - self.assertEqual(result['data'].index.tolist(), - ['10%', '25%', '50%(Median)', '75%', '90%', 'CNRFCDeterministic Forecast']) - - def test_get_watershed_forecast_issue_time(self): - # test for the long range ensemble product - self.assertIsInstance(cnrfc.get_watershed_forecast_issue_time('daily', - 'North Coast', - date_string=None, - deterministic=False), dt.datetime) - - # test for the hourly deterministic product - self.assertIsInstance(cnrfc.get_watershed_forecast_issue_time('hourly', - 'North Coast', - date_string=None, - deterministic=True), dt.datetime) - - # the value None is returned for a specified forecast issuance in the past - self.assertIsNone(cnrfc.get_watershed_forecast_issue_time('daily', - 'North Coast', - date_string='2023010112', - deterministic=False)) - - def test__default_date_string(self): - result = cnrfc.cnrfc._default_date_string(None) - result_dt = dt.datetime.strptime(result, '%Y%m%d%H') - self.assertIsInstance(result, str) - self.assertIsInstance(result_dt, dt.datetime) - self.assertIn(result_dt.hour, [0, 6, 12, 18]) - self.assertEqual(cnrfc.cnrfc._default_date_string('2023112818'), '2023112818') - self.assertRaises(ValueError, cnrfc.cnrfc._default_date_string, '2023112805') - - def test_get_ensemble_product_url(self): - self.assertEqual(cnrfc.get_ensemble_product_url(1, 'VNSC1', data_format=''), - 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=VNSC1&prodID=1') - self.assertEqual(cnrfc.get_ensemble_product_url(3, 'VNSC1', data_format=''), - 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=VNSC1&prodID=3') - self.assertEqual(cnrfc.get_ensemble_product_url(7, 'SHDC1', data_format='Tabular'), - 'https://www.cnrfc.noaa.gov/ensembleProductTabular.php?id=SHDC1&prodID=7') - - def test_get_ensemble_product_6(self): - """ - test download and parsing of monthly probability rainbow barchart plot for Shasta location - """ - result = cnrfc.get_ensemble_product_6('SHDC1') - self.assertEqual(result['data'].shape, (7, 12)) - self.assertEqual(result['data'].index.tolist(), ['10%', '25%', '50%', '75%', '90%', 'Mean', '%Mean']) - self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=SHDC1&prodID=6') - self.assertEqual(result['info']['type'], 'Monthly Streamflow Volume (1000s of Acre-Feet)') - self.assertEqual(result['info']['units'], 'TAF') - - def test_get_ensemble_product_10(self): - """ - test download and parsing of water year accumulated volume plot for Shasta location - """ - result = cnrfc.get_ensemble_product_10('SHDC1') - self.assertEqual(result['data'].shape, (5, 12)) - self.assertEqual(result['data'].index.tolist(), ['10%', '25%', '50%(Median)', '75%', '90%']) - self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=SHDC1&prodID=10') - self.assertEqual(result['info']['type'], 'Water Year Accumulated Volume Plot & Tabular Monthly Volume Accumulation') - self.assertEqual(result['info']['units'], 'TAF') - - def test__parse_blue_table(self): - """ - test the processing of included data table for monthly summary associated with ensemble products like 2, 10, etc - """ - table_soup = BeautifulSoup(textwrap.dedent("""/ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Title
ProbabilityNov
29
Nov
30
Dec
01
Dec
02
Dec
03
- 10%12.224.436.749.062.5
- 25%12.224.436.748.961.4
- 50%
(Median)
12.224.436.648.961.2
- """), 'lxml') - result = cnrfc.cnrfc._parse_blue_table(table_soup) - self.assertIsInstance(result[0], pd.DataFrame) - self.assertEqual(result[0]['Probability'].tolist(), ['10%', '25%', '50%(Median)']) - self.assertIsInstance(result[1], list) + # @property + # def deterministic_frame(self): + # """ + # fixture for testing watershed deterministic file handling + # """ + # if not hasattr(self, '_deterministic_frame'): + # text_data = io.StringIO(textwrap.dedent("""\ + # GMT,CMPC1,NHGC1,MSGC1,FRGC1,EDOC1,SOSC1,MHBC1,MCNC1 + # ,QINE,QINE,QINE,QINE,QINE,QINE,QINE,QINE + # 2019-03-30 12:00:00,2.45972,0.70641,0.08901,0.22803,1.03512,0.71908,2.83132,2.58248 + # 2019-03-30 13:00:00,2.44774,0.67366,0.08901,0.21302,1.03512,0.70908,2.88032,2.56875 + # 2019-03-30 14:00:00,2.43568,0.67408,0.08901,0.19602,1.03011,0.71208,2.84732,2.53694 + # 2019-03-30 15:00:00,2.42353,0.67424,0.08901,0.22903,1.02611,0.70608,2.83132,2.52791 + # 2019-03-30 16:00:00,2.41129,0.67558,0.08901,0.20202,1.02211,0.70208,2.83132,2.50098 + # 2019-03-30 17:00:00,2.39895,0.60832,0.08901,0.21002,1.01811,0.70208,2.81431,2.4876 + # 2019-03-30 18:00:00,2.38652,0.64266,0.08901,0.18302,1.00911,0.69608,2.83132,2.46544 + # 2019-03-30 19:00:00,2.38077,0.67591,0.08701,0.20202,1.00511,0.69208,2.79831,2.45222 + # 2019-03-30 20:00:00,2.37473,0.67491,0.08701,0.18602,1.00111,0.69208,2.79831,2.44343 + # 2019-03-30 21:00:00,2.36843,0.67599,0.08601,0.19602,0.99211,0.68908,2.79831,2.42595 + # 2019-03-30 22:00:00,2.36185,0.67599,0.08601,0.03374,0.99211,0.68208,2.74931,2.41724 + # 2019-03-30 23:00:00,2.35498,0.71033,0.08601,0.19102,0.98411,0.68208,2.78231,2.40856 + # 2019-03-31 00:00:00,2.34785,0.67608,0.08401,0.16702,0.98011,0.67608,2.74931,2.39559 + # 2019-03-31 01:00:00,2.32832,0.67508,0.08401,0.19902,0.97111,0.66607,2.7163,2.38698 + # 2019-03-31 02:00:00,2.30886,0.67608,0.08401,0.16302,0.96311,0.65907,2.7003,2.36982 + # 2019-03-31 03:00:00,2.28949,0.64274,0.08401,0.19302,0.96311,0.65607,2.7163,2.36555 + # 2019-03-31 04:00:00,2.2702,0.6084,0.08401,0.03239,0.95511,0.66907,2.7163,2.34852 + # 2019-03-31 05:00:00,2.25098,0.60724,0.08401,0.17702,0.94711,0.65907,2.6843,2.34004 + # 2019-03-31 06:00:00,2.23185,0.64141,0.08401,0.15302,0.9261,0.65907,2.6683,2.33159 + # 2019-03-31 07:00:00,2.22434,0.60915,0.08401,0.16402,0.9141,0.65607,2.6843,2.31896 + # 2019-03-31 08:00:00,2.21675,0.5749,0.08201,0.17202,0.9141,0.66207,2.62029,2.3022 + # 2019-03-31 09:00:00,2.2091,0.60815,0.08201,0.15802,0.9101,0.65907,2.63629,2.2897 + # 2019-03-31 10:00:00,2.20137,0.64241,0.08101,0.16702,0.9141,0.65907,2.58829,2.27725 + # 2019-03-31 11:00:00,2.19357,0.60924,0.08101,0.16802,0.9141,0.65907,2.57229,2.26486 + # 2019-03-31 12:00:00,2.1857,0.57507,0.08101,0.15402,0.9101,0.65307,2.57229,2.25253 + # 2019-03-31 13:00:00,2.17421,0.60832,0.08101,0.15102,0.9141,0.65307,2.58829,2.23544 + # 2019-03-31 14:00:00,2.16274,0.64257,0.08101,0.18902,0.9101,0.65607,2.55728,2.21627 + # 2019-03-31 15:00:00,2.15131,0.60832,0.08101,0.03094,0.9101,0.64907,2.57229,2.20199 + # 2019-03-31 16:00:00,2.1399,0.54081,0.08101,0.14802,0.9061,0.64307,2.55728,2.18779 + # 2019-03-31 17:00:00,2.12853,0.54081,0.08101,0.03072,0.9061,0.64607,2.57229,2.16429 + # 2019-03-31 18:00:00,2.11718,0.57515,0.08101,0.14502,0.8981,0.64607,2.57229,2.15495 + # 2019-03-31 19:00:00,2.11344,0.57523,0.08101,0.15802,0.9021,0.64007,2.55728,2.13637 + # 2019-03-31 20:00:00,2.10957,0.57531,0.07901,0.14302,0.8981,0.64307,2.54128,2.13174 + # 2019-03-31 21:00:00,2.10557,0.5764,0.07901,0.16502,0.8861,0.63707,2.55728,2.12713 + # 2019-03-31 22:00:00,2.10143,0.63047,0.07901,0.15202,0.8901,0.62707,2.54128,2.11793 + # 2019-03-31 23:00:00,2.09715,0.6617,0.07901,0.13502,0.8821,0.62707,2.54128,2.11793 + # 2019-04-01 00:00:00,2.09274,0.64507,0.07901,0.03001,0.8781,0.61807,2.51028,2.11334 + # 2019-04-01 01:00:00,2.08882,0.61182,0.07701,0.02992,0.8741,0.62107,2.52628,2.10875 + # 2019-04-01 02:00:00,2.08483,0.51206,0.07701,0.02983,0.8701,0.61807,2.49528,2.09962 + # 2019-04-01 03:00:00,2.08079,0.51205,0.07701,0.02974,0.8661,0.61207,2.48028,2.09506 + # 2019-04-01 04:00:00,2.07668,0.51206,0.07701,0.02964,0.8621,0.61207,2.49528,2.09051 + # 2019-04-01 05:00:00,2.07251,0.51206,0.07701,0.02955,0.8541,0.61507,2.48028,2.08144 + # 2019-04-01 06:00:00,2.06829,0.51206,0.07701,0.02946,0.85109,0.62107,2.44927,2.07692 + # 2019-04-01 07:00:00,2.07789,0.51206,0.07701,0.13001,0.84709,0.62407,2.43427,2.0679 + # 2019-04-01 08:00:00,2.08712,0.51206,0.07701,0.02929,0.84709,0.63007,2.44927,2.0634 + # 2019-04-01 09:00:00,2.09597,0.51206,0.07701,0.13502,0.84709,0.62107,2.41927,2.04996 + # 2019-04-01 10:00:00,2.10444,0.50556,0.07701,0.02911,0.84709,0.63407,2.43427,2.04104 + # 2019-04-01 11:00:00,2.11255,0.60507,0.07601,0.02903,0.84709,0.63407,2.41927,2.02772 + # 2019-04-01 12:00:00,2.12029,0.63774,0.07601,0.02894,0.84709,0.62707,2.41927,2.01888 + # 2019-04-01 13:00:00,2.12346,0.59182,0.07601,0.11601,0.85109,0.63707,2.38927,2.00568 + # 2019-04-01 14:00:00,2.12662,0.55896,0.07601,0.11201,0.85109,0.63407,2.41927,1.99255 + # 2019-04-01 15:00:00,2.1298,0.57073,0.07401,0.12301,0.85109,0.62707,2.40427,1.98384 + # 2019-04-01 16:00:00,2.13297,0.5924,0.07401,0.12401,0.85109,0.63007,2.43427,1.97516 + # 2019-04-01 17:00:00,2.13613,0.54539,0.07401,0.12901,0.84709,0.62707,2.41927,1.96652 + # 2019-04-01 18:00:00,2.13929,0.53298,0.07401,0.12101,0.85109,0.63007,2.25725,1.95791 + # 2019-04-01 19:00:00,2.14021,0.56206,0.07301,0.10801,0.84309,0.62107,2.25725,1.95791 + # 2019-04-01 20:00:00,2.14111,0.56231,0.07301,0.12001,0.84309,0.62107,2.27225,1.95361 + # 2019-04-01 21:00:00,2.142,0.52906,0.07301,0.10601,0.83909,0.61807,2.27225,1.94932""")) + # self._deterministic_frame = pd.read_csv(text_data, + # header=0, + # skiprows=[1,], + # nrows=60, + # parse_dates=True, + # index_col=0, + # float_precision='high', + # dtype={'GMT': str}).mul(1000) + # return self._deterministic_frame + + # def test_cnrfc_credentials(self): + # """ + # load sensitive info from .env file and test CNRFC credentials exist + # """ + # load_dotenv() + # self.assertTrue(('CNRFC_USER' in os.environ) & ('CNRFC_PASSWORD' in os.environ)) + + # def test_convert_date_columns(self): + # """Ensure datetime data converted to string format""" + # test_index = self.deterministic_frame.index.strftime('%Y-%m-%d') + # self.assertEqual(test_index.tolist()[0], '2019-03-30') + + # def test_validate_duration(self): + # """ + # function to properly format/case hourly or daily durations + # """ + # duration = 'Hourly' + # self.assertEqual(cnrfc.cnrfc._validate_duration(duration), 'hourly') + + # def test_validate_duration_invalid(self): + # """ + # test that invalid duration raises a ValueError + # """ + # bad_input = 'monthly' + # self.assertRaises(ValueError, + # cnrfc.cnrfc._validate_duration, + # bad_input) + + # def test_get_deterministic_forecast(self): + # """ + # Test that deterministic forecast start from Graphical_RVF page matches + # CSV start of forecast + # """ + # cnrfc_id = 'FOLC1' + # first_ordinate = cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=True)[-1] + # df = cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False)['data'] + # first_forecast_entry = df['forecast'].dropna().index.tolist()[0] + + # # check that the date/time representation in the timestamp and datetime.datetime objects are the same + # self.assertEqual(first_forecast_entry.year, first_ordinate.year) + # self.assertEqual(first_forecast_entry.month, first_ordinate.month) + # self.assertEqual(first_forecast_entry.day, first_ordinate.day) + # self.assertEqual(first_forecast_entry.hour, first_ordinate.hour) + # self.assertEqual(first_forecast_entry.minute, first_ordinate.minute) + + # # for now, strip the local tzinfo from `first_ordinate` + # self.assertEqual(first_forecast_entry.tzinfo, first_ordinate.replace(tzinfo=None).tzinfo) + + # def test_get_deterministic_forecast_watershed(self): + # """ + # test watershed deterministic forecast download for North San Joaquin on a particular date; + # additional future tests to add coverage for arguments: + # - watershed + # - date_string + # - acre_feet=False + # - pdt_convert=False + # - as_pdt=False + # - cnrfc_id=None + # """ + # df = cnrfc.get_deterministic_forecast_watershed('N_SanJoaquin', '2019040412')['data'] + # self.assertEqual(df.head(20)['NHGC1'].values.tolist(), + # self.deterministic_frame.head(20)['NHGC1'].values.tolist()) + # self.assertIsNone(df.index.tzinfo) + + # def test_get_water_year_trend_tabular(self): + # """ + # test water year trend tabular download for a past year for Folsom reservoir forecast point + # """ + # df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] + # self.assertEqual(df.shape, (365, 9)) + + # def test_get_seasonal_trend_tabular(self): + # """ + # test seasonal trend tabular download for a past year for Shasta reservoir forecast point + # """ + # df = cnrfc.get_seasonal_trend_tabular('SHDC1', 2022)['data'] + # self.assertEqual(df.shape, (365, 10)) + + # def test_get_ensemble_forecast(self): + # """ + # test for current ensemble forecast file schema, using Vernalis forecast location + # """ + # result = cnrfc.get_ensemble_forecast('VNSC1', 'hourly', acre_feet=False, pdt_convert=False, as_pdt=False) + # self.assertEqual(result['data'].shape, (721, 43)) + # self.assertIsNone(result['data'].index.tzinfo) + # self.assertEqual(result['info']['watershed'], 'SanJoaquin') + # self.assertEqual(result['info']['units'], 'cfs') + + # def test_get_ensemble_product_1(self): + # """ + # as this method is not yet implemented in the cnrfc module, it is expected to raise an error + # """ + # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_1, 'ORDC1') + + # def test_get_ensemble_product_3(self): + # """ + # as this method is not yet implemented in the cnrfc module, it is expected to raise an error + # """ + # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_3, 'ORDC1') + + # def test_get_ensemble_product_5(self): + # """ + # as this method is not yet implemented in the cnrfc module, it is expected to raise an error + # """ + # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_5, 'ORDC1') + + # def test_get_ensemble_product_11(self): + # """ + # as this method is not yet implemented in the cnrfc module, it is expected to raise an error + # """ + # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_11, 'ORDC1') + + # def test_get_ensemble_product_12(self): + # """ + # as this method is not yet implemented in the cnrfc module, it is expected to raise an error + # """ + # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_12, 'ORDC1') + + # def test_get_ensemble_product_13(self): + # """ + # as this method is not yet implemented in the cnrfc module, it is expected to raise an error + # """ + # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_13, 'ORDC1') + + # def test_get_data_report_part_8(self): + # """ + # as this method is not yet implemented in the cnrfc module, it is expected to raise an error + # """ + # self.assertRaises(NotImplementedError, cnrfc.get_data_report_part_8) + + # def test_get_monthly_reservoir_storage_summary(self): + # """ + # as this method is not yet implemented in the cnrfc module, it is expected to raise an error + # """ + # self.assertRaises(NotImplementedError, cnrfc.get_monthly_reservoir_storage_summary) + + # def test_get_rating_curve(self): + # """ + # example expected output from get_rating_curve method + # """ + # result = cnrfc.get_rating_curve('DCSC1') + # self.assertEqual(result['data'][0], (0.92, 0.45)) + # self.assertEqual(result['data'][-1], (15.0, 16300.0)) + # self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/data/ratings/DCSC1_rating.js') + + # def test_get_watershed(self): + # """ + # example usage for looking up watershed group by forecast point ID + # """ + # self.assertEqual(cnrfc.get_watershed('NCOC1'), 'LowerSacramento') + + # def test_get_forecast_meta_deterministic(self): + # """ + # test for predicted response with get_forecast_meta_deterministic for Oroville forecast point + # """ + # result = cnrfc.get_forecast_meta_deterministic('ORDC1', first_ordinate=False, release=False) + # self.assertIsInstance(result[0], (dt.date, dt.datetime)) + # self.assertIsInstance(result[1], (dt.date, dt.datetime)) + # self.assertEqual(result[2], 'FEATHER RIVER - LAKE OROVILLE (ORDC1)') + # self.assertEqual(result[3], 'Impaired Inflows') + + # def test_get_ensemble_product_2(self): + # """ + # test for the expected format of ensemble produce #2 + # """ + # result = cnrfc.get_ensemble_product_2('BDBC1') + # self.assertEqual(result['info']['type'], 'Tabular 10-Day Streamflow Volume Accumulation') + # self.assertEqual(result['info']['units'], 'TAF') + # self.assertEqual(result['data'].shape, (6, 10)) + # self.assertEqual(result['data'].index.tolist(), + # ['10%', '25%', '50%(Median)', '75%', '90%', 'CNRFCDeterministic Forecast']) + + # def test_get_watershed_forecast_issue_time(self): + # # test for the long range ensemble product + # self.assertIsInstance(cnrfc.get_watershed_forecast_issue_time('daily', + # 'North Coast', + # date_string=None, + # deterministic=False), dt.datetime) + + # # test for the hourly deterministic product + # self.assertIsInstance(cnrfc.get_watershed_forecast_issue_time('hourly', + # 'North Coast', + # date_string=None, + # deterministic=True), dt.datetime) + + # # the value None is returned for a specified forecast issuance in the past + # self.assertIsNone(cnrfc.get_watershed_forecast_issue_time('daily', + # 'North Coast', + # date_string='2023010112', + # deterministic=False)) + + # def test__default_date_string(self): + # result = cnrfc.cnrfc._default_date_string(None) + # result_dt = dt.datetime.strptime(result, '%Y%m%d%H') + # self.assertIsInstance(result, str) + # self.assertIsInstance(result_dt, dt.datetime) + # self.assertIn(result_dt.hour, [0, 6, 12, 18]) + # self.assertEqual(cnrfc.cnrfc._default_date_string('2023112818'), '2023112818') + # self.assertRaises(ValueError, cnrfc.cnrfc._default_date_string, '2023112805') + + # def test_get_ensemble_product_url(self): + # self.assertEqual(cnrfc.get_ensemble_product_url(1, 'VNSC1', data_format=''), + # 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=VNSC1&prodID=1') + # self.assertEqual(cnrfc.get_ensemble_product_url(3, 'VNSC1', data_format=''), + # 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=VNSC1&prodID=3') + # self.assertEqual(cnrfc.get_ensemble_product_url(7, 'SHDC1', data_format='Tabular'), + # 'https://www.cnrfc.noaa.gov/ensembleProductTabular.php?id=SHDC1&prodID=7') + + # def test_get_ensemble_product_6(self): + # """ + # test download and parsing of monthly probability rainbow barchart plot for Shasta location + # """ + # result = cnrfc.get_ensemble_product_6('SHDC1') + # self.assertEqual(result['data'].shape, (7, 12)) + # self.assertEqual(result['data'].index.tolist(), ['10%', '25%', '50%', '75%', '90%', 'Mean', '%Mean']) + # self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=SHDC1&prodID=6') + # self.assertEqual(result['info']['type'], 'Monthly Streamflow Volume (1000s of Acre-Feet)') + # self.assertEqual(result['info']['units'], 'TAF') + + # def test_get_ensemble_product_10(self): + # """ + # test download and parsing of water year accumulated volume plot for Shasta location + # """ + # result = cnrfc.get_ensemble_product_10('SHDC1') + # self.assertEqual(result['data'].shape, (5, 12)) + # self.assertEqual(result['data'].index.tolist(), ['10%', '25%', '50%(Median)', '75%', '90%']) + # self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=SHDC1&prodID=10') + # self.assertEqual(result['info']['type'], 'Water Year Accumulated Volume Plot & Tabular Monthly Volume Accumulation') + # self.assertEqual(result['info']['units'], 'TAF') + + # def test__parse_blue_table(self): + # """ + # test the processing of included data table for monthly summary associated with ensemble products like 2, 10, etc + # """ + # table_soup = BeautifulSoup(textwrap.dedent("""/ + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + #
Title
ProbabilityNov
29
Nov
30
Dec
01
Dec
02
Dec
03
+ # 10%12.224.436.749.062.5
+ # 25%12.224.436.748.961.4
+ # 50%
(Median)
12.224.436.648.961.2
+ # """), 'lxml') + # result = cnrfc.cnrfc._parse_blue_table(table_soup) + # self.assertIsInstance(result[0], pd.DataFrame) + # self.assertEqual(result[0]['Probability'].tolist(), ['10%', '25%', '50%(Median)']) + # self.assertIsInstance(result[1], list) def test__apply_conversions(self): """ @@ -407,119 +407,121 @@ def test__apply_conversions(self): # test for conversion of timezone and kcfs -> cfs result = cnrfc.cnrfc._apply_conversions(df, 'hourly', False, True, True) self.assertIsInstance(result[0], pd.DataFrame) - self.assertEqual(pd.to_datetime(result[0].first_valid_index()), - dt.datetime(2023, 11, 1, 5, tzinfo=tz_function('America/Los_Angeles'))) - self.assertEqual(result[1], 'cfs') - def test_get_ensemble_forecast_watershed(self): - """ - test for retrieiving an ensemble forecast watershed file for a forecast issuance prior to most recent - """ - result = cnrfc.get_ensemble_forecast_watershed('SalinasPajaro', - 'hourly', - '2023010118', - acre_feet=False, - pdt_convert=False, - as_pdt=False, - cnrfc_id=None) - self.assertEqual(result['data'].shape, (721, 924)) - self.assertEqual(result['data'].tail(1)['BTEC1'].values[0], 226.94) - self.assertEqual(pd.to_datetime(result['data'].last_valid_index()), dt.datetime(2023, 1, 31, 18, 0, 0)) - self.assertEqual(result['info']['watershed'], 'SalinasPajaro') - self.assertEqual(result['info']['url'], - 'https://www.cnrfc.noaa.gov/csv/2023010118_SalinasPajaro_hefs_csv_hourly.zip') - self.assertIsNone(result['info']['issue_time']) - - def test_get_esp_trace_analysis_url(self): - """ - test that the build-your-own trace analysis product url is properly constructed for the provided options - """ - url = cnrfc.get_esp_trace_analysis_url('BTYO3', - interval='day', - value_type='mean', - plot_type='traces', - table_type='forecastInfo', - product_type='table', - date_string='20231106', - end_date_string='20231231') - expected_url = '&'.join(['https://www.cnrfc.noaa.gov/ensembleProduct.php?id=BTYO3', - 'prodID=8', - 'interval=day', - 'valueType=mean', - 'plotType=traces', - 'tableType=forecastInfo', - 'productType=table', - 'dateSelection=custom', - 'date=20231106', - 'endDate=20231231']) - self.maxDiff = 800 - self.assertEqual(url, expected_url) - - def test_get_ensemble_first_forecast_ordinate(self): - """ - test that the first ensemble forecast ordinate is a datetime in the past - """ - result = cnrfc.get_ensemble_first_forecast_ordinate( - url='https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv', - df=None - ) - self.assertIsInstance(result, dt.datetime) - result_utc = result.replace(tzinfo=tz_function('UTC')) - self.assertLess(result_utc, dt.datetime.now(tz=tz_function('UTC'))) - - def test__get_forecast_csv(self): - """ - test for forecast CSV data retrieval to in-memory filelike object (private method) - """ - result = cnrfc.cnrfc._get_forecast_csv('https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv') - self.assertIsInstance(result, io.BytesIO) - - # check first line contains forecast point headers - self.assertTrue(result.readline().decode('utf-8').startswith('GMT,HLEC1')) - - # check second line contains variables identifiers - self.assertTrue(result.readline().decode('utf-8').startswith(',QINE,QINE')) - - # check third line contains expected timeseries info - self.assertTrue(result.readline().decode('utf-8').startswith('2023-11-29 12:00:00,0.89311')) - - def test_get_forecast_csvdata(self): - """ - test for forecast CSV data retrieval to in-memory filelike object (public method); duplicate of - test__get_forecast_csv - """ - result = cnrfc.get_forecast_csvdata('https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv') - self.assertIsInstance(result, io.BytesIO) - self.assertTrue(result.readline().decode('utf-8').startswith('GMT,HLEC1')) - self.assertTrue(result.readline().decode('utf-8').startswith(',QINE,QINE')) - self.assertTrue(result.readline().decode('utf-8').startswith('2023-11-29 12:00:00,0.89311')) - - def test__get_cnrfc_restricted_content(self): - """ - test that restricted content can be accessed through the provided credentials - """ - result = cnrfc.cnrfc._get_cnrfc_restricted_content( - 'https://www.cnrfc.noaa.gov/restricted/graphicalRVF_tabular.php?id=FOLC1' - ) - sample = BeautifulSoup(result, 'lxml').find('pre').text.splitlines()[:9] - self.assertEqual(sample[2], '# Location: American River - Folsom Lake (FOLC1)') - self.assertTrue(sample[-1].startswith('# Maximum Observed Flow:')) - - def test_download_watershed_file(self): - """ - test for downloading watershed file to local file system (in this case, downloaded to in-memory object) - """ - result = cnrfc.download_watershed_file('WSI', '2023010112', 'ensemble', duration='daily', path=io.BytesIO()) - self.assertIsInstance(result, io.BytesIO) - - # check first line contains forecast point headers - self.assertTrue(result.readline().decode('utf-8').startswith('GMT,SACC0')) - - # check second line contains variables identifiers - self.assertTrue(result.readline().decode('utf-8').startswith(',SQME,SQME')) + # create a localized datetime with either pytz or zoneinfo modules + expected_dt = utils.get_localized_datetime(dt.datetime(2023, 11, 1, 5), 'America/Los_Angeles') + self.assertEqual(result[0].first_valid_index().to_pydatetime(), expected_dt) + self.assertEqual(result[1], 'cfs') - # check third line contains expected timeseries info - self.assertTrue(result.readline().decode('utf-8').startswith('2023-01-01 12:00:00,252.83904,')) + # def test_get_ensemble_forecast_watershed(self): + # """ + # test for retrieiving an ensemble forecast watershed file for a forecast issuance prior to most recent + # """ + # result = cnrfc.get_ensemble_forecast_watershed('SalinasPajaro', + # 'hourly', + # '2023010118', + # acre_feet=False, + # pdt_convert=False, + # as_pdt=False, + # cnrfc_id=None) + # self.assertEqual(result['data'].shape, (721, 924)) + # self.assertEqual(result['data'].tail(1)['BTEC1'].values[0], 226.94) + # self.assertEqual(pd.to_datetime(result['data'].last_valid_index()), dt.datetime(2023, 1, 31, 18, 0, 0)) + # self.assertEqual(result['info']['watershed'], 'SalinasPajaro') + # self.assertEqual(result['info']['url'], + # 'https://www.cnrfc.noaa.gov/csv/2023010118_SalinasPajaro_hefs_csv_hourly.zip') + # self.assertIsNone(result['info']['issue_time']) + + # def test_get_esp_trace_analysis_url(self): + # """ + # test that the build-your-own trace analysis product url is properly constructed for the provided options + # """ + # url = cnrfc.get_esp_trace_analysis_url('BTYO3', + # interval='day', + # value_type='mean', + # plot_type='traces', + # table_type='forecastInfo', + # product_type='table', + # date_string='20231106', + # end_date_string='20231231') + # expected_url = '&'.join(['https://www.cnrfc.noaa.gov/ensembleProduct.php?id=BTYO3', + # 'prodID=8', + # 'interval=day', + # 'valueType=mean', + # 'plotType=traces', + # 'tableType=forecastInfo', + # 'productType=table', + # 'dateSelection=custom', + # 'date=20231106', + # 'endDate=20231231']) + # self.maxDiff = 800 + # self.assertEqual(url, expected_url) + + # def test_get_ensemble_first_forecast_ordinate(self): + # """ + # test that the first ensemble forecast ordinate is a datetime in the past + # """ + # result = cnrfc.get_ensemble_first_forecast_ordinate( + # url='https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv', + # df=None + # ) + # self.assertIsInstance(result, dt.datetime) + # result_utc = result.replace(tzinfo=tz_function('UTC')) + # self.assertLess(result_utc, dt.datetime.now(tz=tz_function('UTC'))) + + # def test__get_forecast_csv(self): + # """ + # test for forecast CSV data retrieval to in-memory filelike object (private method) + # """ + # result = cnrfc.cnrfc._get_forecast_csv('https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv') + # self.assertIsInstance(result, io.BytesIO) + + # # check first line contains forecast point headers + # self.assertTrue(result.readline().decode('utf-8').startswith('GMT,HLEC1')) + + # # check second line contains variables identifiers + # self.assertTrue(result.readline().decode('utf-8').startswith(',QINE,QINE')) + + # # check third line contains expected timeseries info + # self.assertTrue(result.readline().decode('utf-8').startswith('2023-11-29 12:00:00,0.89311')) + + # def test_get_forecast_csvdata(self): + # """ + # test for forecast CSV data retrieval to in-memory filelike object (public method); duplicate of + # test__get_forecast_csv + # """ + # result = cnrfc.get_forecast_csvdata('https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv') + # self.assertIsInstance(result, io.BytesIO) + # self.assertTrue(result.readline().decode('utf-8').startswith('GMT,HLEC1')) + # self.assertTrue(result.readline().decode('utf-8').startswith(',QINE,QINE')) + # self.assertTrue(result.readline().decode('utf-8').startswith('2023-11-29 12:00:00,0.89311')) + + # def test__get_cnrfc_restricted_content(self): + # """ + # test that restricted content can be accessed through the provided credentials + # """ + # result = cnrfc.cnrfc._get_cnrfc_restricted_content( + # 'https://www.cnrfc.noaa.gov/restricted/graphicalRVF_tabular.php?id=FOLC1' + # ) + # sample = BeautifulSoup(result, 'lxml').find('pre').text.splitlines()[:9] + # self.assertEqual(sample[2], '# Location: American River - Folsom Lake (FOLC1)') + # self.assertTrue(sample[-1].startswith('# Maximum Observed Flow:')) + + # def test_download_watershed_file(self): + # """ + # test for downloading watershed file to local file system (in this case, downloaded to in-memory object) + # """ + # result = cnrfc.download_watershed_file('WSI', '2023010112', 'ensemble', duration='daily', path=io.BytesIO()) + # self.assertIsInstance(result, io.BytesIO) + + # # check first line contains forecast point headers + # self.assertTrue(result.readline().decode('utf-8').startswith('GMT,SACC0')) + + # # check second line contains variables identifiers + # self.assertTrue(result.readline().decode('utf-8').startswith(',SQME,SQME')) + + # # check third line contains expected timeseries info + # self.assertTrue(result.readline().decode('utf-8').startswith('2023-01-01 12:00:00,252.83904,')) if __name__ == '__main__': diff --git a/collect/utils/utils.py b/collect/utils/utils.py index f44e033..dae90ab 100644 --- a/collect/utils/utils.py +++ b/collect/utils/utils.py @@ -18,6 +18,15 @@ from requests.adapters import HTTPAdapter +# alternate timezone representation depending on Python version +try: + from zoneinfo import ZoneInfo + tz_function = ZoneInfo +except: + from pytz import timezone + tz_function = timezone + + def get_session_response(url, auth=None): """ Arguments: @@ -76,7 +85,27 @@ def get_water_year(datetime_structure): Arguments: datetime_structure (datetime.datetime): a Python datetime + Returns: + water_year (int): the water year for the provided datetime """ if datetime_structure.month < 10: return datetime_structure.year - return datetime_structure.year + 1 \ No newline at end of file + return datetime_structure.year + 1 + + +def get_localized_datetime(naive_datetime, timezone_string): + """ + provides cross-version support for python versions before existence of zoneinfo module + + Arguments: + naive_datetime (datetime.datetime): a datetime without any timezone information + timezone_string (str): the string identifier for the desired timezone (i.e. 'UTC' or 'US/Pacific') + Returns: + result (datetime.datetime): a python datetime structure with timezone localization + """ + try: + expected_tz = timezone(timezone_string) + result = expected_tz.localize(naive_datetime) + except: + result = naive_datetime.replace(tzinfo=ZoneInfo(timezone_string)) + return result From 193ac851734bae57363bb00606a3df5836414ad1 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 29 Nov 2023 14:34:25 -0800 Subject: [PATCH 18/36] Update imports in test files --- collect/tests/test_alert.py | 1 - collect/tests/test_cnrfc.py | 943 ++++++++++++++++++------------------ collect/tests/test_nid.py | 1 - collect/tests/test_usace.py | 1 - collect/tests/test_usgs.py | 1 - collect/tests/test_utils.py | 4 - 6 files changed, 467 insertions(+), 484 deletions(-) diff --git a/collect/tests/test_alert.py b/collect/tests/test_alert.py index d054eed..9c1d426 100644 --- a/collect/tests/test_alert.py +++ b/collect/tests/test_alert.py @@ -6,7 +6,6 @@ # -*- coding: utf-8 -*- import datetime as dt import unittest -import unittest.mock from collect import alert diff --git a/collect/tests/test_cnrfc.py b/collect/tests/test_cnrfc.py index 6ce8b3a..6f9b448 100644 --- a/collect/tests/test_cnrfc.py +++ b/collect/tests/test_cnrfc.py @@ -9,382 +9,373 @@ import os import textwrap import unittest -import unittest.mock from bs4 import BeautifulSoup from dotenv import load_dotenv import pandas as pd -from collect import cnrfc - -# alternate timezone representation depending on Python version -try: - from zoneinfo import ZoneInfo - tz_function = ZoneInfo -except: - from pytz import timezone - tz_function = timezone +from collect import cnrfc, utils class TestCNRFC(unittest.TestCase): - # @property - # def deterministic_frame(self): - # """ - # fixture for testing watershed deterministic file handling - # """ - # if not hasattr(self, '_deterministic_frame'): - # text_data = io.StringIO(textwrap.dedent("""\ - # GMT,CMPC1,NHGC1,MSGC1,FRGC1,EDOC1,SOSC1,MHBC1,MCNC1 - # ,QINE,QINE,QINE,QINE,QINE,QINE,QINE,QINE - # 2019-03-30 12:00:00,2.45972,0.70641,0.08901,0.22803,1.03512,0.71908,2.83132,2.58248 - # 2019-03-30 13:00:00,2.44774,0.67366,0.08901,0.21302,1.03512,0.70908,2.88032,2.56875 - # 2019-03-30 14:00:00,2.43568,0.67408,0.08901,0.19602,1.03011,0.71208,2.84732,2.53694 - # 2019-03-30 15:00:00,2.42353,0.67424,0.08901,0.22903,1.02611,0.70608,2.83132,2.52791 - # 2019-03-30 16:00:00,2.41129,0.67558,0.08901,0.20202,1.02211,0.70208,2.83132,2.50098 - # 2019-03-30 17:00:00,2.39895,0.60832,0.08901,0.21002,1.01811,0.70208,2.81431,2.4876 - # 2019-03-30 18:00:00,2.38652,0.64266,0.08901,0.18302,1.00911,0.69608,2.83132,2.46544 - # 2019-03-30 19:00:00,2.38077,0.67591,0.08701,0.20202,1.00511,0.69208,2.79831,2.45222 - # 2019-03-30 20:00:00,2.37473,0.67491,0.08701,0.18602,1.00111,0.69208,2.79831,2.44343 - # 2019-03-30 21:00:00,2.36843,0.67599,0.08601,0.19602,0.99211,0.68908,2.79831,2.42595 - # 2019-03-30 22:00:00,2.36185,0.67599,0.08601,0.03374,0.99211,0.68208,2.74931,2.41724 - # 2019-03-30 23:00:00,2.35498,0.71033,0.08601,0.19102,0.98411,0.68208,2.78231,2.40856 - # 2019-03-31 00:00:00,2.34785,0.67608,0.08401,0.16702,0.98011,0.67608,2.74931,2.39559 - # 2019-03-31 01:00:00,2.32832,0.67508,0.08401,0.19902,0.97111,0.66607,2.7163,2.38698 - # 2019-03-31 02:00:00,2.30886,0.67608,0.08401,0.16302,0.96311,0.65907,2.7003,2.36982 - # 2019-03-31 03:00:00,2.28949,0.64274,0.08401,0.19302,0.96311,0.65607,2.7163,2.36555 - # 2019-03-31 04:00:00,2.2702,0.6084,0.08401,0.03239,0.95511,0.66907,2.7163,2.34852 - # 2019-03-31 05:00:00,2.25098,0.60724,0.08401,0.17702,0.94711,0.65907,2.6843,2.34004 - # 2019-03-31 06:00:00,2.23185,0.64141,0.08401,0.15302,0.9261,0.65907,2.6683,2.33159 - # 2019-03-31 07:00:00,2.22434,0.60915,0.08401,0.16402,0.9141,0.65607,2.6843,2.31896 - # 2019-03-31 08:00:00,2.21675,0.5749,0.08201,0.17202,0.9141,0.66207,2.62029,2.3022 - # 2019-03-31 09:00:00,2.2091,0.60815,0.08201,0.15802,0.9101,0.65907,2.63629,2.2897 - # 2019-03-31 10:00:00,2.20137,0.64241,0.08101,0.16702,0.9141,0.65907,2.58829,2.27725 - # 2019-03-31 11:00:00,2.19357,0.60924,0.08101,0.16802,0.9141,0.65907,2.57229,2.26486 - # 2019-03-31 12:00:00,2.1857,0.57507,0.08101,0.15402,0.9101,0.65307,2.57229,2.25253 - # 2019-03-31 13:00:00,2.17421,0.60832,0.08101,0.15102,0.9141,0.65307,2.58829,2.23544 - # 2019-03-31 14:00:00,2.16274,0.64257,0.08101,0.18902,0.9101,0.65607,2.55728,2.21627 - # 2019-03-31 15:00:00,2.15131,0.60832,0.08101,0.03094,0.9101,0.64907,2.57229,2.20199 - # 2019-03-31 16:00:00,2.1399,0.54081,0.08101,0.14802,0.9061,0.64307,2.55728,2.18779 - # 2019-03-31 17:00:00,2.12853,0.54081,0.08101,0.03072,0.9061,0.64607,2.57229,2.16429 - # 2019-03-31 18:00:00,2.11718,0.57515,0.08101,0.14502,0.8981,0.64607,2.57229,2.15495 - # 2019-03-31 19:00:00,2.11344,0.57523,0.08101,0.15802,0.9021,0.64007,2.55728,2.13637 - # 2019-03-31 20:00:00,2.10957,0.57531,0.07901,0.14302,0.8981,0.64307,2.54128,2.13174 - # 2019-03-31 21:00:00,2.10557,0.5764,0.07901,0.16502,0.8861,0.63707,2.55728,2.12713 - # 2019-03-31 22:00:00,2.10143,0.63047,0.07901,0.15202,0.8901,0.62707,2.54128,2.11793 - # 2019-03-31 23:00:00,2.09715,0.6617,0.07901,0.13502,0.8821,0.62707,2.54128,2.11793 - # 2019-04-01 00:00:00,2.09274,0.64507,0.07901,0.03001,0.8781,0.61807,2.51028,2.11334 - # 2019-04-01 01:00:00,2.08882,0.61182,0.07701,0.02992,0.8741,0.62107,2.52628,2.10875 - # 2019-04-01 02:00:00,2.08483,0.51206,0.07701,0.02983,0.8701,0.61807,2.49528,2.09962 - # 2019-04-01 03:00:00,2.08079,0.51205,0.07701,0.02974,0.8661,0.61207,2.48028,2.09506 - # 2019-04-01 04:00:00,2.07668,0.51206,0.07701,0.02964,0.8621,0.61207,2.49528,2.09051 - # 2019-04-01 05:00:00,2.07251,0.51206,0.07701,0.02955,0.8541,0.61507,2.48028,2.08144 - # 2019-04-01 06:00:00,2.06829,0.51206,0.07701,0.02946,0.85109,0.62107,2.44927,2.07692 - # 2019-04-01 07:00:00,2.07789,0.51206,0.07701,0.13001,0.84709,0.62407,2.43427,2.0679 - # 2019-04-01 08:00:00,2.08712,0.51206,0.07701,0.02929,0.84709,0.63007,2.44927,2.0634 - # 2019-04-01 09:00:00,2.09597,0.51206,0.07701,0.13502,0.84709,0.62107,2.41927,2.04996 - # 2019-04-01 10:00:00,2.10444,0.50556,0.07701,0.02911,0.84709,0.63407,2.43427,2.04104 - # 2019-04-01 11:00:00,2.11255,0.60507,0.07601,0.02903,0.84709,0.63407,2.41927,2.02772 - # 2019-04-01 12:00:00,2.12029,0.63774,0.07601,0.02894,0.84709,0.62707,2.41927,2.01888 - # 2019-04-01 13:00:00,2.12346,0.59182,0.07601,0.11601,0.85109,0.63707,2.38927,2.00568 - # 2019-04-01 14:00:00,2.12662,0.55896,0.07601,0.11201,0.85109,0.63407,2.41927,1.99255 - # 2019-04-01 15:00:00,2.1298,0.57073,0.07401,0.12301,0.85109,0.62707,2.40427,1.98384 - # 2019-04-01 16:00:00,2.13297,0.5924,0.07401,0.12401,0.85109,0.63007,2.43427,1.97516 - # 2019-04-01 17:00:00,2.13613,0.54539,0.07401,0.12901,0.84709,0.62707,2.41927,1.96652 - # 2019-04-01 18:00:00,2.13929,0.53298,0.07401,0.12101,0.85109,0.63007,2.25725,1.95791 - # 2019-04-01 19:00:00,2.14021,0.56206,0.07301,0.10801,0.84309,0.62107,2.25725,1.95791 - # 2019-04-01 20:00:00,2.14111,0.56231,0.07301,0.12001,0.84309,0.62107,2.27225,1.95361 - # 2019-04-01 21:00:00,2.142,0.52906,0.07301,0.10601,0.83909,0.61807,2.27225,1.94932""")) - # self._deterministic_frame = pd.read_csv(text_data, - # header=0, - # skiprows=[1,], - # nrows=60, - # parse_dates=True, - # index_col=0, - # float_precision='high', - # dtype={'GMT': str}).mul(1000) - # return self._deterministic_frame - - # def test_cnrfc_credentials(self): - # """ - # load sensitive info from .env file and test CNRFC credentials exist - # """ - # load_dotenv() - # self.assertTrue(('CNRFC_USER' in os.environ) & ('CNRFC_PASSWORD' in os.environ)) - - # def test_convert_date_columns(self): - # """Ensure datetime data converted to string format""" - # test_index = self.deterministic_frame.index.strftime('%Y-%m-%d') - # self.assertEqual(test_index.tolist()[0], '2019-03-30') - - # def test_validate_duration(self): - # """ - # function to properly format/case hourly or daily durations - # """ - # duration = 'Hourly' - # self.assertEqual(cnrfc.cnrfc._validate_duration(duration), 'hourly') - - # def test_validate_duration_invalid(self): - # """ - # test that invalid duration raises a ValueError - # """ - # bad_input = 'monthly' - # self.assertRaises(ValueError, - # cnrfc.cnrfc._validate_duration, - # bad_input) - - # def test_get_deterministic_forecast(self): - # """ - # Test that deterministic forecast start from Graphical_RVF page matches - # CSV start of forecast - # """ - # cnrfc_id = 'FOLC1' - # first_ordinate = cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=True)[-1] - # df = cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False)['data'] - # first_forecast_entry = df['forecast'].dropna().index.tolist()[0] - - # # check that the date/time representation in the timestamp and datetime.datetime objects are the same - # self.assertEqual(first_forecast_entry.year, first_ordinate.year) - # self.assertEqual(first_forecast_entry.month, first_ordinate.month) - # self.assertEqual(first_forecast_entry.day, first_ordinate.day) - # self.assertEqual(first_forecast_entry.hour, first_ordinate.hour) - # self.assertEqual(first_forecast_entry.minute, first_ordinate.minute) - - # # for now, strip the local tzinfo from `first_ordinate` - # self.assertEqual(first_forecast_entry.tzinfo, first_ordinate.replace(tzinfo=None).tzinfo) - - # def test_get_deterministic_forecast_watershed(self): - # """ - # test watershed deterministic forecast download for North San Joaquin on a particular date; - # additional future tests to add coverage for arguments: - # - watershed - # - date_string - # - acre_feet=False - # - pdt_convert=False - # - as_pdt=False - # - cnrfc_id=None - # """ - # df = cnrfc.get_deterministic_forecast_watershed('N_SanJoaquin', '2019040412')['data'] - # self.assertEqual(df.head(20)['NHGC1'].values.tolist(), - # self.deterministic_frame.head(20)['NHGC1'].values.tolist()) - # self.assertIsNone(df.index.tzinfo) - - # def test_get_water_year_trend_tabular(self): - # """ - # test water year trend tabular download for a past year for Folsom reservoir forecast point - # """ - # df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] - # self.assertEqual(df.shape, (365, 9)) - - # def test_get_seasonal_trend_tabular(self): - # """ - # test seasonal trend tabular download for a past year for Shasta reservoir forecast point - # """ - # df = cnrfc.get_seasonal_trend_tabular('SHDC1', 2022)['data'] - # self.assertEqual(df.shape, (365, 10)) - - # def test_get_ensemble_forecast(self): - # """ - # test for current ensemble forecast file schema, using Vernalis forecast location - # """ - # result = cnrfc.get_ensemble_forecast('VNSC1', 'hourly', acre_feet=False, pdt_convert=False, as_pdt=False) - # self.assertEqual(result['data'].shape, (721, 43)) - # self.assertIsNone(result['data'].index.tzinfo) - # self.assertEqual(result['info']['watershed'], 'SanJoaquin') - # self.assertEqual(result['info']['units'], 'cfs') - - # def test_get_ensemble_product_1(self): - # """ - # as this method is not yet implemented in the cnrfc module, it is expected to raise an error - # """ - # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_1, 'ORDC1') - - # def test_get_ensemble_product_3(self): - # """ - # as this method is not yet implemented in the cnrfc module, it is expected to raise an error - # """ - # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_3, 'ORDC1') - - # def test_get_ensemble_product_5(self): - # """ - # as this method is not yet implemented in the cnrfc module, it is expected to raise an error - # """ - # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_5, 'ORDC1') - - # def test_get_ensemble_product_11(self): - # """ - # as this method is not yet implemented in the cnrfc module, it is expected to raise an error - # """ - # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_11, 'ORDC1') - - # def test_get_ensemble_product_12(self): - # """ - # as this method is not yet implemented in the cnrfc module, it is expected to raise an error - # """ - # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_12, 'ORDC1') - - # def test_get_ensemble_product_13(self): - # """ - # as this method is not yet implemented in the cnrfc module, it is expected to raise an error - # """ - # self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_13, 'ORDC1') - - # def test_get_data_report_part_8(self): - # """ - # as this method is not yet implemented in the cnrfc module, it is expected to raise an error - # """ - # self.assertRaises(NotImplementedError, cnrfc.get_data_report_part_8) - - # def test_get_monthly_reservoir_storage_summary(self): - # """ - # as this method is not yet implemented in the cnrfc module, it is expected to raise an error - # """ - # self.assertRaises(NotImplementedError, cnrfc.get_monthly_reservoir_storage_summary) - - # def test_get_rating_curve(self): - # """ - # example expected output from get_rating_curve method - # """ - # result = cnrfc.get_rating_curve('DCSC1') - # self.assertEqual(result['data'][0], (0.92, 0.45)) - # self.assertEqual(result['data'][-1], (15.0, 16300.0)) - # self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/data/ratings/DCSC1_rating.js') - - # def test_get_watershed(self): - # """ - # example usage for looking up watershed group by forecast point ID - # """ - # self.assertEqual(cnrfc.get_watershed('NCOC1'), 'LowerSacramento') - - # def test_get_forecast_meta_deterministic(self): - # """ - # test for predicted response with get_forecast_meta_deterministic for Oroville forecast point - # """ - # result = cnrfc.get_forecast_meta_deterministic('ORDC1', first_ordinate=False, release=False) - # self.assertIsInstance(result[0], (dt.date, dt.datetime)) - # self.assertIsInstance(result[1], (dt.date, dt.datetime)) - # self.assertEqual(result[2], 'FEATHER RIVER - LAKE OROVILLE (ORDC1)') - # self.assertEqual(result[3], 'Impaired Inflows') - - # def test_get_ensemble_product_2(self): - # """ - # test for the expected format of ensemble produce #2 - # """ - # result = cnrfc.get_ensemble_product_2('BDBC1') - # self.assertEqual(result['info']['type'], 'Tabular 10-Day Streamflow Volume Accumulation') - # self.assertEqual(result['info']['units'], 'TAF') - # self.assertEqual(result['data'].shape, (6, 10)) - # self.assertEqual(result['data'].index.tolist(), - # ['10%', '25%', '50%(Median)', '75%', '90%', 'CNRFCDeterministic Forecast']) - - # def test_get_watershed_forecast_issue_time(self): - # # test for the long range ensemble product - # self.assertIsInstance(cnrfc.get_watershed_forecast_issue_time('daily', - # 'North Coast', - # date_string=None, - # deterministic=False), dt.datetime) - - # # test for the hourly deterministic product - # self.assertIsInstance(cnrfc.get_watershed_forecast_issue_time('hourly', - # 'North Coast', - # date_string=None, - # deterministic=True), dt.datetime) - - # # the value None is returned for a specified forecast issuance in the past - # self.assertIsNone(cnrfc.get_watershed_forecast_issue_time('daily', - # 'North Coast', - # date_string='2023010112', - # deterministic=False)) - - # def test__default_date_string(self): - # result = cnrfc.cnrfc._default_date_string(None) - # result_dt = dt.datetime.strptime(result, '%Y%m%d%H') - # self.assertIsInstance(result, str) - # self.assertIsInstance(result_dt, dt.datetime) - # self.assertIn(result_dt.hour, [0, 6, 12, 18]) - # self.assertEqual(cnrfc.cnrfc._default_date_string('2023112818'), '2023112818') - # self.assertRaises(ValueError, cnrfc.cnrfc._default_date_string, '2023112805') - - # def test_get_ensemble_product_url(self): - # self.assertEqual(cnrfc.get_ensemble_product_url(1, 'VNSC1', data_format=''), - # 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=VNSC1&prodID=1') - # self.assertEqual(cnrfc.get_ensemble_product_url(3, 'VNSC1', data_format=''), - # 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=VNSC1&prodID=3') - # self.assertEqual(cnrfc.get_ensemble_product_url(7, 'SHDC1', data_format='Tabular'), - # 'https://www.cnrfc.noaa.gov/ensembleProductTabular.php?id=SHDC1&prodID=7') - - # def test_get_ensemble_product_6(self): - # """ - # test download and parsing of monthly probability rainbow barchart plot for Shasta location - # """ - # result = cnrfc.get_ensemble_product_6('SHDC1') - # self.assertEqual(result['data'].shape, (7, 12)) - # self.assertEqual(result['data'].index.tolist(), ['10%', '25%', '50%', '75%', '90%', 'Mean', '%Mean']) - # self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=SHDC1&prodID=6') - # self.assertEqual(result['info']['type'], 'Monthly Streamflow Volume (1000s of Acre-Feet)') - # self.assertEqual(result['info']['units'], 'TAF') - - # def test_get_ensemble_product_10(self): - # """ - # test download and parsing of water year accumulated volume plot for Shasta location - # """ - # result = cnrfc.get_ensemble_product_10('SHDC1') - # self.assertEqual(result['data'].shape, (5, 12)) - # self.assertEqual(result['data'].index.tolist(), ['10%', '25%', '50%(Median)', '75%', '90%']) - # self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=SHDC1&prodID=10') - # self.assertEqual(result['info']['type'], 'Water Year Accumulated Volume Plot & Tabular Monthly Volume Accumulation') - # self.assertEqual(result['info']['units'], 'TAF') - - # def test__parse_blue_table(self): - # """ - # test the processing of included data table for monthly summary associated with ensemble products like 2, 10, etc - # """ - # table_soup = BeautifulSoup(textwrap.dedent("""/ - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - # - #
Title
ProbabilityNov
29
Nov
30
Dec
01
Dec
02
Dec
03
- # 10%12.224.436.749.062.5
- # 25%12.224.436.748.961.4
- # 50%
(Median)
12.224.436.648.961.2
- # """), 'lxml') - # result = cnrfc.cnrfc._parse_blue_table(table_soup) - # self.assertIsInstance(result[0], pd.DataFrame) - # self.assertEqual(result[0]['Probability'].tolist(), ['10%', '25%', '50%(Median)']) - # self.assertIsInstance(result[1], list) + @property + def deterministic_frame(self): + """ + fixture for testing watershed deterministic file handling + """ + if not hasattr(self, '_deterministic_frame'): + text_data = io.StringIO(textwrap.dedent("""\ + GMT,CMPC1,NHGC1,MSGC1,FRGC1,EDOC1,SOSC1,MHBC1,MCNC1 + ,QINE,QINE,QINE,QINE,QINE,QINE,QINE,QINE + 2019-03-30 12:00:00,2.45972,0.70641,0.08901,0.22803,1.03512,0.71908,2.83132,2.58248 + 2019-03-30 13:00:00,2.44774,0.67366,0.08901,0.21302,1.03512,0.70908,2.88032,2.56875 + 2019-03-30 14:00:00,2.43568,0.67408,0.08901,0.19602,1.03011,0.71208,2.84732,2.53694 + 2019-03-30 15:00:00,2.42353,0.67424,0.08901,0.22903,1.02611,0.70608,2.83132,2.52791 + 2019-03-30 16:00:00,2.41129,0.67558,0.08901,0.20202,1.02211,0.70208,2.83132,2.50098 + 2019-03-30 17:00:00,2.39895,0.60832,0.08901,0.21002,1.01811,0.70208,2.81431,2.4876 + 2019-03-30 18:00:00,2.38652,0.64266,0.08901,0.18302,1.00911,0.69608,2.83132,2.46544 + 2019-03-30 19:00:00,2.38077,0.67591,0.08701,0.20202,1.00511,0.69208,2.79831,2.45222 + 2019-03-30 20:00:00,2.37473,0.67491,0.08701,0.18602,1.00111,0.69208,2.79831,2.44343 + 2019-03-30 21:00:00,2.36843,0.67599,0.08601,0.19602,0.99211,0.68908,2.79831,2.42595 + 2019-03-30 22:00:00,2.36185,0.67599,0.08601,0.03374,0.99211,0.68208,2.74931,2.41724 + 2019-03-30 23:00:00,2.35498,0.71033,0.08601,0.19102,0.98411,0.68208,2.78231,2.40856 + 2019-03-31 00:00:00,2.34785,0.67608,0.08401,0.16702,0.98011,0.67608,2.74931,2.39559 + 2019-03-31 01:00:00,2.32832,0.67508,0.08401,0.19902,0.97111,0.66607,2.7163,2.38698 + 2019-03-31 02:00:00,2.30886,0.67608,0.08401,0.16302,0.96311,0.65907,2.7003,2.36982 + 2019-03-31 03:00:00,2.28949,0.64274,0.08401,0.19302,0.96311,0.65607,2.7163,2.36555 + 2019-03-31 04:00:00,2.2702,0.6084,0.08401,0.03239,0.95511,0.66907,2.7163,2.34852 + 2019-03-31 05:00:00,2.25098,0.60724,0.08401,0.17702,0.94711,0.65907,2.6843,2.34004 + 2019-03-31 06:00:00,2.23185,0.64141,0.08401,0.15302,0.9261,0.65907,2.6683,2.33159 + 2019-03-31 07:00:00,2.22434,0.60915,0.08401,0.16402,0.9141,0.65607,2.6843,2.31896 + 2019-03-31 08:00:00,2.21675,0.5749,0.08201,0.17202,0.9141,0.66207,2.62029,2.3022 + 2019-03-31 09:00:00,2.2091,0.60815,0.08201,0.15802,0.9101,0.65907,2.63629,2.2897 + 2019-03-31 10:00:00,2.20137,0.64241,0.08101,0.16702,0.9141,0.65907,2.58829,2.27725 + 2019-03-31 11:00:00,2.19357,0.60924,0.08101,0.16802,0.9141,0.65907,2.57229,2.26486 + 2019-03-31 12:00:00,2.1857,0.57507,0.08101,0.15402,0.9101,0.65307,2.57229,2.25253 + 2019-03-31 13:00:00,2.17421,0.60832,0.08101,0.15102,0.9141,0.65307,2.58829,2.23544 + 2019-03-31 14:00:00,2.16274,0.64257,0.08101,0.18902,0.9101,0.65607,2.55728,2.21627 + 2019-03-31 15:00:00,2.15131,0.60832,0.08101,0.03094,0.9101,0.64907,2.57229,2.20199 + 2019-03-31 16:00:00,2.1399,0.54081,0.08101,0.14802,0.9061,0.64307,2.55728,2.18779 + 2019-03-31 17:00:00,2.12853,0.54081,0.08101,0.03072,0.9061,0.64607,2.57229,2.16429 + 2019-03-31 18:00:00,2.11718,0.57515,0.08101,0.14502,0.8981,0.64607,2.57229,2.15495 + 2019-03-31 19:00:00,2.11344,0.57523,0.08101,0.15802,0.9021,0.64007,2.55728,2.13637 + 2019-03-31 20:00:00,2.10957,0.57531,0.07901,0.14302,0.8981,0.64307,2.54128,2.13174 + 2019-03-31 21:00:00,2.10557,0.5764,0.07901,0.16502,0.8861,0.63707,2.55728,2.12713 + 2019-03-31 22:00:00,2.10143,0.63047,0.07901,0.15202,0.8901,0.62707,2.54128,2.11793 + 2019-03-31 23:00:00,2.09715,0.6617,0.07901,0.13502,0.8821,0.62707,2.54128,2.11793 + 2019-04-01 00:00:00,2.09274,0.64507,0.07901,0.03001,0.8781,0.61807,2.51028,2.11334 + 2019-04-01 01:00:00,2.08882,0.61182,0.07701,0.02992,0.8741,0.62107,2.52628,2.10875 + 2019-04-01 02:00:00,2.08483,0.51206,0.07701,0.02983,0.8701,0.61807,2.49528,2.09962 + 2019-04-01 03:00:00,2.08079,0.51205,0.07701,0.02974,0.8661,0.61207,2.48028,2.09506 + 2019-04-01 04:00:00,2.07668,0.51206,0.07701,0.02964,0.8621,0.61207,2.49528,2.09051 + 2019-04-01 05:00:00,2.07251,0.51206,0.07701,0.02955,0.8541,0.61507,2.48028,2.08144 + 2019-04-01 06:00:00,2.06829,0.51206,0.07701,0.02946,0.85109,0.62107,2.44927,2.07692 + 2019-04-01 07:00:00,2.07789,0.51206,0.07701,0.13001,0.84709,0.62407,2.43427,2.0679 + 2019-04-01 08:00:00,2.08712,0.51206,0.07701,0.02929,0.84709,0.63007,2.44927,2.0634 + 2019-04-01 09:00:00,2.09597,0.51206,0.07701,0.13502,0.84709,0.62107,2.41927,2.04996 + 2019-04-01 10:00:00,2.10444,0.50556,0.07701,0.02911,0.84709,0.63407,2.43427,2.04104 + 2019-04-01 11:00:00,2.11255,0.60507,0.07601,0.02903,0.84709,0.63407,2.41927,2.02772 + 2019-04-01 12:00:00,2.12029,0.63774,0.07601,0.02894,0.84709,0.62707,2.41927,2.01888 + 2019-04-01 13:00:00,2.12346,0.59182,0.07601,0.11601,0.85109,0.63707,2.38927,2.00568 + 2019-04-01 14:00:00,2.12662,0.55896,0.07601,0.11201,0.85109,0.63407,2.41927,1.99255 + 2019-04-01 15:00:00,2.1298,0.57073,0.07401,0.12301,0.85109,0.62707,2.40427,1.98384 + 2019-04-01 16:00:00,2.13297,0.5924,0.07401,0.12401,0.85109,0.63007,2.43427,1.97516 + 2019-04-01 17:00:00,2.13613,0.54539,0.07401,0.12901,0.84709,0.62707,2.41927,1.96652 + 2019-04-01 18:00:00,2.13929,0.53298,0.07401,0.12101,0.85109,0.63007,2.25725,1.95791 + 2019-04-01 19:00:00,2.14021,0.56206,0.07301,0.10801,0.84309,0.62107,2.25725,1.95791 + 2019-04-01 20:00:00,2.14111,0.56231,0.07301,0.12001,0.84309,0.62107,2.27225,1.95361 + 2019-04-01 21:00:00,2.142,0.52906,0.07301,0.10601,0.83909,0.61807,2.27225,1.94932""")) + self._deterministic_frame = pd.read_csv(text_data, + header=0, + skiprows=[1,], + nrows=60, + parse_dates=True, + index_col=0, + float_precision='high', + dtype={'GMT': str}).mul(1000) + return self._deterministic_frame + + def test_cnrfc_credentials(self): + """ + load sensitive info from .env file and test CNRFC credentials exist + """ + load_dotenv() + self.assertTrue(('CNRFC_USER' in os.environ) & ('CNRFC_PASSWORD' in os.environ)) + + def test_convert_date_columns(self): + """Ensure datetime data converted to string format""" + test_index = self.deterministic_frame.index.strftime('%Y-%m-%d') + self.assertEqual(test_index.tolist()[0], '2019-03-30') + + def test_validate_duration(self): + """ + function to properly format/case hourly or daily durations + """ + duration = 'Hourly' + self.assertEqual(cnrfc.cnrfc._validate_duration(duration), 'hourly') + + def test_validate_duration_invalid(self): + """ + test that invalid duration raises a ValueError + """ + bad_input = 'monthly' + self.assertRaises(ValueError, + cnrfc.cnrfc._validate_duration, + bad_input) + + def test_get_deterministic_forecast(self): + """ + Test that deterministic forecast start from Graphical_RVF page matches + CSV start of forecast + """ + cnrfc_id = 'FOLC1' + first_ordinate = cnrfc.get_forecast_meta_deterministic(cnrfc_id, first_ordinate=True)[-1] + df = cnrfc.get_deterministic_forecast(cnrfc_id, truncate_historical=False)['data'] + first_forecast_entry = df['forecast'].dropna().index.tolist()[0] + + # check that the date/time representation in the timestamp and datetime.datetime objects are the same + self.assertEqual(first_forecast_entry.year, first_ordinate.year) + self.assertEqual(first_forecast_entry.month, first_ordinate.month) + self.assertEqual(first_forecast_entry.day, first_ordinate.day) + self.assertEqual(first_forecast_entry.hour, first_ordinate.hour) + self.assertEqual(first_forecast_entry.minute, first_ordinate.minute) + + # for now, strip the local tzinfo from `first_ordinate` + self.assertEqual(first_forecast_entry.tzinfo, first_ordinate.replace(tzinfo=None).tzinfo) + + def test_get_deterministic_forecast_watershed(self): + """ + test watershed deterministic forecast download for North San Joaquin on a particular date; + additional future tests to add coverage for arguments: + - watershed + - date_string + - acre_feet=False + - pdt_convert=False + - as_pdt=False + - cnrfc_id=None + """ + df = cnrfc.get_deterministic_forecast_watershed('N_SanJoaquin', '2019040412')['data'] + self.assertEqual(df.head(20)['NHGC1'].values.tolist(), + self.deterministic_frame.head(20)['NHGC1'].values.tolist()) + self.assertIsNone(df.index.tzinfo) + + def test_get_water_year_trend_tabular(self): + """ + test water year trend tabular download for a past year for Folsom reservoir forecast point + """ + df = cnrfc.get_water_year_trend_tabular('FOLC1', '2022')['data'] + self.assertEqual(df.shape, (365, 9)) + + def test_get_seasonal_trend_tabular(self): + """ + test seasonal trend tabular download for a past year for Shasta reservoir forecast point + """ + df = cnrfc.get_seasonal_trend_tabular('SHDC1', 2022)['data'] + self.assertEqual(df.shape, (365, 10)) + + def test_get_ensemble_forecast(self): + """ + test for current ensemble forecast file schema, using Vernalis forecast location + """ + result = cnrfc.get_ensemble_forecast('VNSC1', 'hourly', acre_feet=False, pdt_convert=False, as_pdt=False) + self.assertEqual(result['data'].shape, (721, 43)) + self.assertIsNone(result['data'].index.tzinfo) + self.assertEqual(result['info']['watershed'], 'SanJoaquin') + self.assertEqual(result['info']['units'], 'cfs') + + def test_get_ensemble_product_1(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ + self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_1, 'ORDC1') + + def test_get_ensemble_product_3(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ + self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_3, 'ORDC1') + + def test_get_ensemble_product_5(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ + self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_5, 'ORDC1') + + def test_get_ensemble_product_11(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ + self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_11, 'ORDC1') + + def test_get_ensemble_product_12(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ + self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_12, 'ORDC1') + + def test_get_ensemble_product_13(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ + self.assertRaises(NotImplementedError, cnrfc.get_ensemble_product_13, 'ORDC1') + + def test_get_data_report_part_8(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ + self.assertRaises(NotImplementedError, cnrfc.get_data_report_part_8) + + def test_get_monthly_reservoir_storage_summary(self): + """ + as this method is not yet implemented in the cnrfc module, it is expected to raise an error + """ + self.assertRaises(NotImplementedError, cnrfc.get_monthly_reservoir_storage_summary) + + def test_get_rating_curve(self): + """ + example expected output from get_rating_curve method + """ + result = cnrfc.get_rating_curve('DCSC1') + self.assertEqual(result['data'][0], (0.92, 0.45)) + self.assertEqual(result['data'][-1], (15.0, 16300.0)) + self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/data/ratings/DCSC1_rating.js') + + def test_get_watershed(self): + """ + example usage for looking up watershed group by forecast point ID + """ + self.assertEqual(cnrfc.get_watershed('NCOC1'), 'LowerSacramento') + + def test_get_forecast_meta_deterministic(self): + """ + test for predicted response with get_forecast_meta_deterministic for Oroville forecast point + """ + result = cnrfc.get_forecast_meta_deterministic('ORDC1', first_ordinate=False, release=False) + self.assertIsInstance(result[0], (dt.date, dt.datetime)) + self.assertIsInstance(result[1], (dt.date, dt.datetime)) + self.assertEqual(result[2], 'FEATHER RIVER - LAKE OROVILLE (ORDC1)') + self.assertEqual(result[3], 'Impaired Inflows') + + def test_get_ensemble_product_2(self): + """ + test for the expected format of ensemble produce #2 + """ + result = cnrfc.get_ensemble_product_2('BDBC1') + self.assertEqual(result['info']['type'], 'Tabular 10-Day Streamflow Volume Accumulation') + self.assertEqual(result['info']['units'], 'TAF') + self.assertEqual(result['data'].shape, (6, 10)) + self.assertEqual(result['data'].index.tolist(), + ['10%', '25%', '50%(Median)', '75%', '90%', 'CNRFCDeterministic Forecast']) + + def test_get_watershed_forecast_issue_time(self): + # test for the long range ensemble product + self.assertIsInstance(cnrfc.get_watershed_forecast_issue_time('daily', + 'North Coast', + date_string=None, + deterministic=False), dt.datetime) + + # test for the hourly deterministic product + self.assertIsInstance(cnrfc.get_watershed_forecast_issue_time('hourly', + 'North Coast', + date_string=None, + deterministic=True), dt.datetime) + + # the value None is returned for a specified forecast issuance in the past + self.assertIsNone(cnrfc.get_watershed_forecast_issue_time('daily', + 'North Coast', + date_string='2023010112', + deterministic=False)) + + def test__default_date_string(self): + result = cnrfc.cnrfc._default_date_string(None) + result_dt = dt.datetime.strptime(result, '%Y%m%d%H') + self.assertIsInstance(result, str) + self.assertIsInstance(result_dt, dt.datetime) + self.assertIn(result_dt.hour, [0, 6, 12, 18]) + self.assertEqual(cnrfc.cnrfc._default_date_string('2023112818'), '2023112818') + self.assertRaises(ValueError, cnrfc.cnrfc._default_date_string, '2023112805') + + def test_get_ensemble_product_url(self): + self.assertEqual(cnrfc.get_ensemble_product_url(1, 'VNSC1', data_format=''), + 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=VNSC1&prodID=1') + self.assertEqual(cnrfc.get_ensemble_product_url(3, 'VNSC1', data_format=''), + 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=VNSC1&prodID=3') + self.assertEqual(cnrfc.get_ensemble_product_url(7, 'SHDC1', data_format='Tabular'), + 'https://www.cnrfc.noaa.gov/ensembleProductTabular.php?id=SHDC1&prodID=7') + + def test_get_ensemble_product_6(self): + """ + test download and parsing of monthly probability rainbow barchart plot for Shasta location + """ + result = cnrfc.get_ensemble_product_6('SHDC1') + self.assertEqual(result['data'].shape, (7, 12)) + self.assertEqual(result['data'].index.tolist(), ['10%', '25%', '50%', '75%', '90%', 'Mean', '%Mean']) + self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=SHDC1&prodID=6') + self.assertEqual(result['info']['type'], 'Monthly Streamflow Volume (1000s of Acre-Feet)') + self.assertEqual(result['info']['units'], 'TAF') + + def test_get_ensemble_product_10(self): + """ + test download and parsing of water year accumulated volume plot for Shasta location + """ + result = cnrfc.get_ensemble_product_10('SHDC1') + self.assertEqual(result['data'].shape, (5, 12)) + self.assertEqual(result['data'].index.tolist(), ['10%', '25%', '50%(Median)', '75%', '90%']) + self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=SHDC1&prodID=10') + self.assertEqual(result['info']['type'], 'Water Year Accumulated Volume Plot & Tabular Monthly Volume Accumulation') + self.assertEqual(result['info']['units'], 'TAF') + + def test__parse_blue_table(self): + """ + test the processing of included data table for monthly summary associated with ensemble products like 2, 10, etc + """ + table_soup = BeautifulSoup(textwrap.dedent("""/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Title
ProbabilityNov
29
Nov
30
Dec
01
Dec
02
Dec
03
+ 10%12.224.436.749.062.5
+ 25%12.224.436.748.961.4
+ 50%
(Median)
12.224.436.648.961.2
+ """), 'lxml') + result = cnrfc.cnrfc._parse_blue_table(table_soup) + self.assertIsInstance(result[0], pd.DataFrame) + self.assertEqual(result[0]['Probability'].tolist(), ['10%', '25%', '50%(Median)']) + self.assertIsInstance(result[1], list) def test__apply_conversions(self): """ @@ -413,115 +404,115 @@ def test__apply_conversions(self): self.assertEqual(result[0].first_valid_index().to_pydatetime(), expected_dt) self.assertEqual(result[1], 'cfs') - # def test_get_ensemble_forecast_watershed(self): - # """ - # test for retrieiving an ensemble forecast watershed file for a forecast issuance prior to most recent - # """ - # result = cnrfc.get_ensemble_forecast_watershed('SalinasPajaro', - # 'hourly', - # '2023010118', - # acre_feet=False, - # pdt_convert=False, - # as_pdt=False, - # cnrfc_id=None) - # self.assertEqual(result['data'].shape, (721, 924)) - # self.assertEqual(result['data'].tail(1)['BTEC1'].values[0], 226.94) - # self.assertEqual(pd.to_datetime(result['data'].last_valid_index()), dt.datetime(2023, 1, 31, 18, 0, 0)) - # self.assertEqual(result['info']['watershed'], 'SalinasPajaro') - # self.assertEqual(result['info']['url'], - # 'https://www.cnrfc.noaa.gov/csv/2023010118_SalinasPajaro_hefs_csv_hourly.zip') - # self.assertIsNone(result['info']['issue_time']) - - # def test_get_esp_trace_analysis_url(self): - # """ - # test that the build-your-own trace analysis product url is properly constructed for the provided options - # """ - # url = cnrfc.get_esp_trace_analysis_url('BTYO3', - # interval='day', - # value_type='mean', - # plot_type='traces', - # table_type='forecastInfo', - # product_type='table', - # date_string='20231106', - # end_date_string='20231231') - # expected_url = '&'.join(['https://www.cnrfc.noaa.gov/ensembleProduct.php?id=BTYO3', - # 'prodID=8', - # 'interval=day', - # 'valueType=mean', - # 'plotType=traces', - # 'tableType=forecastInfo', - # 'productType=table', - # 'dateSelection=custom', - # 'date=20231106', - # 'endDate=20231231']) - # self.maxDiff = 800 - # self.assertEqual(url, expected_url) - - # def test_get_ensemble_first_forecast_ordinate(self): - # """ - # test that the first ensemble forecast ordinate is a datetime in the past - # """ - # result = cnrfc.get_ensemble_first_forecast_ordinate( - # url='https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv', - # df=None - # ) - # self.assertIsInstance(result, dt.datetime) - # result_utc = result.replace(tzinfo=tz_function('UTC')) - # self.assertLess(result_utc, dt.datetime.now(tz=tz_function('UTC'))) - - # def test__get_forecast_csv(self): - # """ - # test for forecast CSV data retrieval to in-memory filelike object (private method) - # """ - # result = cnrfc.cnrfc._get_forecast_csv('https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv') - # self.assertIsInstance(result, io.BytesIO) - - # # check first line contains forecast point headers - # self.assertTrue(result.readline().decode('utf-8').startswith('GMT,HLEC1')) - - # # check second line contains variables identifiers - # self.assertTrue(result.readline().decode('utf-8').startswith(',QINE,QINE')) - - # # check third line contains expected timeseries info - # self.assertTrue(result.readline().decode('utf-8').startswith('2023-11-29 12:00:00,0.89311')) - - # def test_get_forecast_csvdata(self): - # """ - # test for forecast CSV data retrieval to in-memory filelike object (public method); duplicate of - # test__get_forecast_csv - # """ - # result = cnrfc.get_forecast_csvdata('https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv') - # self.assertIsInstance(result, io.BytesIO) - # self.assertTrue(result.readline().decode('utf-8').startswith('GMT,HLEC1')) - # self.assertTrue(result.readline().decode('utf-8').startswith(',QINE,QINE')) - # self.assertTrue(result.readline().decode('utf-8').startswith('2023-11-29 12:00:00,0.89311')) - - # def test__get_cnrfc_restricted_content(self): - # """ - # test that restricted content can be accessed through the provided credentials - # """ - # result = cnrfc.cnrfc._get_cnrfc_restricted_content( - # 'https://www.cnrfc.noaa.gov/restricted/graphicalRVF_tabular.php?id=FOLC1' - # ) - # sample = BeautifulSoup(result, 'lxml').find('pre').text.splitlines()[:9] - # self.assertEqual(sample[2], '# Location: American River - Folsom Lake (FOLC1)') - # self.assertTrue(sample[-1].startswith('# Maximum Observed Flow:')) - - # def test_download_watershed_file(self): - # """ - # test for downloading watershed file to local file system (in this case, downloaded to in-memory object) - # """ - # result = cnrfc.download_watershed_file('WSI', '2023010112', 'ensemble', duration='daily', path=io.BytesIO()) - # self.assertIsInstance(result, io.BytesIO) - - # # check first line contains forecast point headers - # self.assertTrue(result.readline().decode('utf-8').startswith('GMT,SACC0')) - - # # check second line contains variables identifiers - # self.assertTrue(result.readline().decode('utf-8').startswith(',SQME,SQME')) - - # # check third line contains expected timeseries info - # self.assertTrue(result.readline().decode('utf-8').startswith('2023-01-01 12:00:00,252.83904,')) + def test_get_ensemble_forecast_watershed(self): + """ + test for retrieiving an ensemble forecast watershed file for a forecast issuance prior to most recent + """ + result = cnrfc.get_ensemble_forecast_watershed('SalinasPajaro', + 'hourly', + '2023010118', + acre_feet=False, + pdt_convert=False, + as_pdt=False, + cnrfc_id=None) + self.assertEqual(result['data'].shape, (721, 924)) + self.assertEqual(result['data'].tail(1)['BTEC1'].values[0], 226.94) + self.assertEqual(pd.to_datetime(result['data'].last_valid_index()), dt.datetime(2023, 1, 31, 18, 0, 0)) + self.assertEqual(result['info']['watershed'], 'SalinasPajaro') + self.assertEqual(result['info']['url'], + 'https://www.cnrfc.noaa.gov/csv/2023010118_SalinasPajaro_hefs_csv_hourly.zip') + self.assertIsNone(result['info']['issue_time']) + + def test_get_esp_trace_analysis_url(self): + """ + test that the build-your-own trace analysis product url is properly constructed for the provided options + """ + url = cnrfc.get_esp_trace_analysis_url('BTYO3', + interval='day', + value_type='mean', + plot_type='traces', + table_type='forecastInfo', + product_type='table', + date_string='20231106', + end_date_string='20231231') + expected_url = '&'.join(['https://www.cnrfc.noaa.gov/ensembleProduct.php?id=BTYO3', + 'prodID=8', + 'interval=day', + 'valueType=mean', + 'plotType=traces', + 'tableType=forecastInfo', + 'productType=table', + 'dateSelection=custom', + 'date=20231106', + 'endDate=20231231']) + self.maxDiff = 800 + self.assertEqual(url, expected_url) + + def test_get_ensemble_first_forecast_ordinate(self): + """ + test that the first ensemble forecast ordinate is a datetime in the past + """ + result = cnrfc.get_ensemble_first_forecast_ordinate( + url='https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv', + df=None + ) + self.assertIsInstance(result, dt.datetime) + result_utc = utils.get_localized_datetime(result, 'UTC') + self.assertLess(result_utc, utils.get_localized_datetime(dt.datetime.now(), 'UTC')) + + def test__get_forecast_csv(self): + """ + test for forecast CSV data retrieval to in-memory filelike object (private method) + """ + result = cnrfc.cnrfc._get_forecast_csv('https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv') + self.assertIsInstance(result, io.BytesIO) + + # check first line contains forecast point headers + self.assertTrue(result.readline().decode('utf-8').startswith('GMT,HLEC1')) + + # check second line contains variables identifiers + self.assertTrue(result.readline().decode('utf-8').startswith(',QINE,QINE')) + + # check third line contains expected timeseries info + self.assertTrue(result.readline().decode('utf-8').startswith('2023-11-29 12:00:00,0.89311')) + + def test_get_forecast_csvdata(self): + """ + test for forecast CSV data retrieval to in-memory filelike object (public method); duplicate of + test__get_forecast_csv + """ + result = cnrfc.get_forecast_csvdata('https://www.cnrfc.noaa.gov/csv/HLEC1_hefs_csv_hourly.csv') + self.assertIsInstance(result, io.BytesIO) + self.assertTrue(result.readline().decode('utf-8').startswith('GMT,HLEC1')) + self.assertTrue(result.readline().decode('utf-8').startswith(',QINE,QINE')) + self.assertTrue(result.readline().decode('utf-8').startswith('2023-11-29 12:00:00,0.89311')) + + def test__get_cnrfc_restricted_content(self): + """ + test that restricted content can be accessed through the provided credentials + """ + result = cnrfc.cnrfc._get_cnrfc_restricted_content( + 'https://www.cnrfc.noaa.gov/restricted/graphicalRVF_tabular.php?id=FOLC1' + ) + sample = BeautifulSoup(result, 'lxml').find('pre').text.splitlines()[:9] + self.assertEqual(sample[2], '# Location: American River - Folsom Lake (FOLC1)') + self.assertTrue(sample[-1].startswith('# Maximum Observed Flow:')) + + def test_download_watershed_file(self): + """ + test for downloading watershed file to local file system (in this case, downloaded to in-memory object) + """ + result = cnrfc.download_watershed_file('WSI', '2023010112', 'ensemble', duration='daily', path=io.BytesIO()) + self.assertIsInstance(result, io.BytesIO) + + # check first line contains forecast point headers + self.assertTrue(result.readline().decode('utf-8').startswith('GMT,SACC0')) + + # check second line contains variables identifiers + self.assertTrue(result.readline().decode('utf-8').startswith(',SQME,SQME')) + + # check third line contains expected timeseries info + self.assertTrue(result.readline().decode('utf-8').startswith('2023-01-01 12:00:00,252.83904,')) if __name__ == '__main__': diff --git a/collect/tests/test_nid.py b/collect/tests/test_nid.py index 9051c59..0d195fb 100644 --- a/collect/tests/test_nid.py +++ b/collect/tests/test_nid.py @@ -8,7 +8,6 @@ import io import textwrap import unittest -import unittest.mock import pandas as pd from collect import nid diff --git a/collect/tests/test_usace.py b/collect/tests/test_usace.py index 6e21411..aa121ae 100644 --- a/collect/tests/test_usace.py +++ b/collect/tests/test_usace.py @@ -6,7 +6,6 @@ # -*- coding: utf-8 -*- import datetime as dt import unittest -import unittest.mock from collect.usace import wcds diff --git a/collect/tests/test_usgs.py b/collect/tests/test_usgs.py index c2469f2..5ac6096 100644 --- a/collect/tests/test_usgs.py +++ b/collect/tests/test_usgs.py @@ -6,7 +6,6 @@ # -*- coding: utf-8 -*- import datetime as dt import unittest -import unittest.mock from collect import usgs diff --git a/collect/tests/test_utils.py b/collect/tests/test_utils.py index ca6998d..25830cc 100644 --- a/collect/tests/test_utils.py +++ b/collect/tests/test_utils.py @@ -5,11 +5,7 @@ """ # -*- coding: utf-8 -*- import datetime as dt -import io -import os -import textwrap import unittest -import unittest.mock import pandas as pd import requests from collect import utils From ef1fec6c33eb1cf0a94abbee29cd0514a2bd25f5 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 29 Nov 2023 16:44:36 -0800 Subject: [PATCH 19/36] Updates for dwr.swp module compatibility with Python 3.11 and linting --- collect/dwr/swp.py | 94 +++++++++++++++++++++++---------------- collect/tests/test_dwr.py | 92 +++++++++++++++++++++++++++++++------- 2 files changed, 130 insertions(+), 56 deletions(-) diff --git a/collect/dwr/swp.py b/collect/dwr/swp.py index 43c9e6c..e38ed63 100644 --- a/collect/dwr/swp.py +++ b/collect/dwr/swp.py @@ -9,14 +9,13 @@ import re import pandas as pd -import requests +from collect import utils -def prompt_installation_and_exit(): - try: - import pdftotext - except: - print('Module pdftotext is required for SWP report collection. Install with `pip install pdftotext==2.2.2`') +try: + import pdftotext +except: + print('Module pdftotext is required for SWP report collection. Install with `pip install pdftotext==2.2.2`') exit() @@ -29,8 +28,22 @@ def get_report_catalog(console=True): Returns: catalog (dict): nested dictionary of report names and associated URLs """ - oco_url_base = 'https://water.ca.gov/-/media/DWR-Website/Web-Pages/Programs/State-Water-Project/Operations-And-Maintenance/Files/Operations-Control-Office/' - cnra_url_base = 'https://data.cnra.ca.gov/dataset/742110dc-0d96-40bc-8e4e-f3594c6c4fe4/resource/45c01d10-4da2-4ebb-8927-367b3bb1e601/download/' + oco_url_base = '/'.join(['https://water.ca.gov/-/media', + 'DWR-Website', + 'Web-Pages', + 'Programs', + 'State-Water-Project', + 'Operations-And-Maintenance', + 'Files', + 'Operations-Control-Office', + '']) + cnra_url_base = '/'.join(['https://data.cnra.ca.gov', + 'dataset', + '742110dc-0d96-40bc-8e4e-f3594c6c4fe4', + 'resource', + '45c01d10-4da2-4ebb-8927-367b3bb1e601', + 'download', + '']) catalog = { 'Dispatcher\'s Daily Water Reports': { @@ -43,15 +56,20 @@ def get_report_catalog(console=True): 'Sun': f'{cnra_url_base}dispatchers-sunday-water-report.txt', }, 'Delta Status and Operations': { - 'Delta Operations Summary (daily)': f'{oco_url_base}Delta-Status-And-Operations/Delta-Operations-Daily-Summary.pdf', - 'Water Quality Summary (daily)': f'{oco_url_base}Delta-Status-And-Operations/Delta-Water-Quality-Daily-Summary.pdf', - 'Hydrologic Conditions Summary (daily)': f'{oco_url_base}Delta-Status-And-Operations/Delta-Hydrologic-Conditions-Daily-Summary.pdf', - 'Miscellaneous Monitoring Data (daily)': f'{oco_url_base}Delta-Status-And-Operations/Delta-Miscellaneous-Daily-Monitoring-Data.pdf', + 'Delta Operations Summary (daily)': + f'{oco_url_base}Delta-Status-And-Operations/Delta-Operations-Daily-Summary.pdf', + 'Water Quality Summary (daily)': + f'{oco_url_base}Delta-Status-And-Operations/Delta-Water-Quality-Daily-Summary.pdf', + 'Hydrologic Conditions Summary (daily)': + f'{oco_url_base}Delta-Status-And-Operations/Delta-Hydrologic-Conditions-Daily-Summary.pdf', + 'Miscellaneous Monitoring Data (daily)': + f'{oco_url_base}Delta-Status-And-Operations/Delta-Miscellaneous-Daily-Monitoring-Data.pdf', 'Barker Slough Flows (weekly)': f'{oco_url_base}Delta-Status-And-Operations/Barker-Slough-Weekly-Flows.pdf' }, 'Oroville Operations': { 'Forecasted Storage': f'{oco_url_base}Oroville-Operations/Oroville-Forecasted-Storage.pdf', - 'Hatchery and Robinson Riffle Daily Average Water Temperature': f'{oco_url_base}Oroville-Operations/Hatchery-and-Robinson-Riffle-Daily-Average-Water-Temperature.pdf', + 'Hatchery and Robinson Riffle Daily Average Water Temperature': + f'{oco_url_base}Oroville-Operations/Hatchery-and-Robinson-Riffle-Daily-Average-Water-Temperature.pdf', }, 'Weekly Reservoir Storage Charts': { 'Oroville': f'{oco_url_base}Project-Wide-Operations/Oroville-Weekly-Reservoir-Storage-Chart.pdf', @@ -86,10 +104,6 @@ def get_report_url(report): Returns: url (str): the path to the PDF report """ - url_base = 'https://water.ca.gov/-/media/DWR-Website/Web-Pages/Programs/State-Water-Project/' - swp_base = url_base + 'Operations-And-Maintenance/Files/Operations-Control-Office/Delta-Status-And-Operations' - url = '/'.join([swp_base, 'Delta-Operations-Daily-Summary.pdf']) - # flatten the catalog flat = {k: v for d in get_report_catalog(console=False).values() for k, v in d.items() } @@ -99,21 +113,26 @@ def get_report_url(report): def get_raw_text(report, filename=None, preserve_white_space=True): """ + extract text data from a PDF report on the SWP website + Arguments: filename (str): optional filename (.txt) for raw report export Returns: content (str): the string contents of the PDF (preserves whitespace) + Raises: + ValueError: if the specified report does not map to a PDF, raise a ValueError """ # construct URL url = get_report_url(report) + if not url.endswith('.pdf'): + raise ValueError(f'ERROR: {report} is not PDF-formatted') + # request report content from URL - f = io.BytesIO(requests.get(url).content) - f.seek(0) + with io.BytesIO(utils.get_session_response(url).content) as buf: - # parse PDF and extract as string - prompt_installation_and_exit() - content = pdftotext.PDF(f, raw=False, physical=True)[0] + # parse PDF and extract as string + content = pdftotext.PDF(buf, raw=False, physical=True)[0] # optionally export the raw report as text if filename: @@ -122,7 +141,7 @@ def get_raw_text(report, filename=None, preserve_white_space=True): # optionally strip out indentation and excess white space from text if not preserve_white_space: content = '\n'.join([str(x).strip() for x in content.splitlines() if bool(x.strip().lstrip('~'))]) - + # write to file f.write(content) @@ -133,7 +152,7 @@ def get_raw_text(report, filename=None, preserve_white_space=True): def get_delta_daily_data(export_as='dict'): """ fetch and return SWP OCO's daily delta operations report - + Arguments: export_as (str): designates which format to use for returned data Returns: @@ -142,7 +161,7 @@ def get_delta_daily_data(export_as='dict'): content = get_raw_text('Delta Operations Summary (daily)', 'raw_export.txt') # extract current report's date - rx = re.compile(r'(?P\d{1,2}/\d{1,2}/\d{4})') + rx = re.compile(r'(?P\d{1,2}/\d{1,2}/\d{4})') date = rx.search(content).group('date') # parse the report date @@ -182,9 +201,9 @@ def _parse_entry(match): # structured dictionary template organizes the categories of the report result = { - # 'date': date_reformat, + # 'date': date_reformat, 'Scheduled Exports for Today': { - 'Clifton Court Inflow': [], + 'Clifton Court Inflow': [], 'Jones Pumping Plant': [] }, 'Estimated Delta Hydrology': { @@ -236,7 +255,7 @@ def _parse_entry(match): def get_barker_slough_data(): """ fetch and return SWP OCO's Barker Slough Flows (weekly) report - + Arguments: report (str): designates which report to retrieve Returns: @@ -245,7 +264,7 @@ def get_barker_slough_data(): content = get_raw_text('Barker Slough Flows (weekly)') # report information - meta = { + meta = { 'filename': 'Barker-Slough-Weekly-Flows.pdf', 'title': content.splitlines()[0], 'contact': 'OCO_Export_Management@water.ca.gov', @@ -253,7 +272,7 @@ def get_barker_slough_data(): 'raw': content, } - # strip leading white space, filter out empty rows, and split rows + # strip leading white space, filter out empty rows, and split rows # based variable # of whitespace characters (2 or more) rows = [re.split(r'\s{2,}', x.lstrip()) for x in content.splitlines() if bool(x)] @@ -262,7 +281,7 @@ def get_barker_slough_data(): df = pd.DataFrame(rows[3:], columns=rows[2]) df.set_index('Date', drop=True, inplace=True) df.index = pd.to_datetime(df.index) - + # return result return {'info': meta, 'data': df} @@ -274,7 +293,6 @@ def get_oco_tabular_data(report): Arguments: report (str): designates which report to retrieve - filename (str): optional filename (.txt) for raw report export Returns: content (str): the string contents of the PDF (preserves whitespace) """ @@ -282,12 +300,10 @@ def get_oco_tabular_data(report): url = get_report_url(report) # request report content from URL - f = io.BytesIO(requests.get(url).content) - f.seek(0) + with io.BytesIO(utils.get_session_response(url).content) as buf: - # parse PDF and extract as string - prompt_installation_and_exit() - content = list(pdftotext.PDF(f, raw=False, physical=True)) + # parse PDF and extract as string + content = list(pdftotext.PDF(buf, raw=False, physical=True)) # report information meta = { @@ -307,7 +323,7 @@ def _process_page(i, page, report): Returns: df (pandas.DataFrame): tabular results as dataframe """ - # strip leading white space, filter out empty rows, and split rows + # strip leading white space, filter out empty rows, and split rows # based variable # of whitespace characters (2 or more) page = page.replace(',', '') rows = [re.split(r'\s{2,}', x.lstrip()) @@ -354,7 +370,7 @@ def _process_page(i, page, report): rows[4] = [''] + rows[4] rows[6] = ['Date (30 days)'] + rows[6] df.columns = [' '.join(list(x)).strip() for x in zip(*[rows[2], rows[4], rows[6]])] - + # page 2 of hydrology report elif i == 1: rows[2] = [''] + rows[2] diff --git a/collect/tests/test_dwr.py b/collect/tests/test_dwr.py index 4e26b4d..839186b 100644 --- a/collect/tests/test_dwr.py +++ b/collect/tests/test_dwr.py @@ -154,25 +154,83 @@ def deferred_test_get_daily_snowpack_data(self): class TestSWP(unittest.TestCase): def deferred_test_prompt_installation_and_exit(self): + """ + test to ensure appropriate warning is printed when pdftotext is not installed; not yet implemented + """ swp.prompt_installation_and_exit() - def deferred_test_get_report_catalog(self): - swp.get_report_catalog() - - def deferred_test_get_report_url(self): - swp.get_report_url() - - def deferred_test_get_raw_text(self): - swp.get_raw_text() - - def deferred_test_get_delta_daily_data(self): - swp.get_delta_daily_data() - - def deferred_test_get_barker_slough_data(self): - swp.get_barker_slough_data() - - def deferred_test_get_oco_tabular_data(self): - swp.get_oco_tabular_data() + def test_get_report_catalog(self): + """ + test the default message behavior for get_report_catalog + """ + result = swp.get_report_catalog(console=False) + self.assertTrue('Oroville Operations' in result) + self.assertTrue('Weekly Summaries' in result) + + def test_get_report_url(self): + """ + verify get_report_url produces the expected URL formats + """ + # check one of the reservoir PDF reports + expected_url = '/'.join(['https://water.ca.gov/-/media', + 'DWR-Website', + 'Web-Pages', + 'Programs', + 'State-Water-Project', + 'Operations-And-Maintenance', + 'Files', + 'Operations-Control-Office', + 'Project-Wide-Operations', + 'Oroville-Weekly-Reservoir-Storage-Chart.pdf']) + self.assertEqual(swp.get_report_url('Oroville'), expected_url) + + # check one of the txt-formatted reports + expected_url = '/'.join(['https://data.cnra.ca.gov/dataset', + '742110dc-0d96-40bc-8e4e-f3594c6c4fe4', + 'resource', + '45c01d10-4da2-4ebb-8927-367b3bb1e601', + 'download', + 'dispatchers-monday-water-report.txt']) + self.assertEqual(swp.get_report_url('Mon'), expected_url) + + # check for invalid input + self.assertIsNone(swp.get_report_url('invalid')) + + def test_get_raw_text(self): + """ + test expected behavior for get_raw_text for pdf report and invalid text report + """ + # test for a PDF-formatted report + result = swp.get_raw_text('Delta Operations Summary (daily)') + self.assertIsInstance(result, str) + self.assertTrue(result.startswith('PRELIMINARY DATA')) + self.assertTrue(result.strip().endswith('please contact OCO_Export_Management@water.ca.gov')) + + # test for a text-formatted report + self.assertRaises(ValueError, swp.get_raw_text, 'Mon') + + def test_get_delta_daily_data(self): + result = swp.get_delta_daily_data('dict') + self.assertTrue(result['info']['title'].startswith('EXECUTIVE OPERATIONS SUMMARY ON ')) + self.assertIsInstance(result['data'], dict) + self.assertTrue('Reservoir Releases' in result['data']) + + def test_get_barker_slough_data(self): + result = swp.get_barker_slough_data() + self.assertEqual(result['info']['title'], 'BARKER SLOUGH PUMPING PLANT WEEKLY REPORT') + self.assertEqual(result['data'].shape, (7, 3)) + self.assertIsInstance(result['data'].index, pd.core.indexes.datetimes.DatetimeIndex) + + def test_get_oco_tabular_data(self): + """ + test tabular data extraction for the Water Quality Summary report using get_oco_tabular_data + """ + result = swp.get_oco_tabular_data('Water Quality Summary (daily)') + self.assertEqual(result['info']['filename'], 'Delta-Water-Quality-Daily-Summary.pdf') + self.assertIsInstance(result['info']['pages'], int) + self.assertIsInstance(result['data'], pd.DataFrame) + self.assertEqual(result['data'].shape, (30, 46)) + self.assertEqual(result['data'].index.name, 'Date (30 days)') class TestWSI(unittest.TestCase): From 9fe8ffc95e2f0e30f492d37020e5446cb2c935fc Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 29 Nov 2023 17:36:25 -0800 Subject: [PATCH 20/36] Add B120 initial tests and related changes in dwr.b120 --- collect/dwr/b120.py | 70 ++++---- collect/tests/test_dwr.py | 361 ++++++++++++++++++++++---------------- 2 files changed, 239 insertions(+), 192 deletions(-) diff --git a/collect/dwr/b120.py b/collect/dwr/b120.py index 72ada3f..d56c504 100644 --- a/collect/dwr/b120.py +++ b/collect/dwr/b120.py @@ -12,12 +12,12 @@ import requests from collect.dwr import errors -from collect.utils.utils import get_web_status, clean_fixed_width_headers +from collect.utils import utils # TODO - add support for historical reports in format: -# http://cdec.water.ca.gov/reportapp/javareports?name=B120.201203 -# http://cdec.water.ca.gov/reportapp/javareports?name=B120.201802 +# https://cdec.water.ca.gov/reportapp/javareports?name=B120.201203 +# https://cdec.water.ca.gov/reportapp/javareports?name=B120.201802 # TODO - check updated homepage for bulletin 120 for new links # https://cdec.water.ca.gov/snow/bulletin120/index2.html @@ -33,18 +33,19 @@ def get_b120_data(date_suffix=''): Args: date_suffix (str): Returns: - + (dict): dictionary of extracted data and metadata (info) + Raises: + collect.dwr.errors.B120SourceError: raised when the specified date is outside of the range available as HTML products """ - if validate_date_suffix(date_suffix, min_year=2017): # main B120 page (new DWR format) - url = 'http://cdec.water.ca.gov/b120{}.html'.format(date_suffix) + url = 'https://cdec.water.ca.gov/b120{}.html'.format(date_suffix) # parse HTML file structure; AJ forecast table - soup = BeautifulSoup(requests.get(url).content, 'html5lib') + soup = BeautifulSoup(utils.get_session_response(url).content, 'html5lib') table = soup.find('table', {'class': 'doc-aj-table'}) - + # read HTML table with April-July Forecast Summary (TAF) aj_list = [] for tr in table.find('tbody').find_all('tr'): @@ -98,26 +99,22 @@ def get_b120_data(date_suffix=''): def validate_date_suffix(date_suffix, min_year=2017): """ - min year is 2017 for HTML-formatted report at - https://cdec.water.ca.gov/b120_YYYYMM.html - min year is 2011 for text-formatted report at - http://cdec.water.ca.gov/reportapp/javareports?name=B120.YYYYMM + min year is 2017 for HTML-formatted report at https://cdec.water.ca.gov/b120_YYYYMM.html + min year is 2011 for text-formatted report at https://cdec.water.ca.gov/reportapp/javareports?name=B120.YYYYMM Args: - date_suffix (str): - min_year (int): + date_suffix (str): string date suffix representing year and month (_YYYYMM) + min_year (int): the minimum year for valid date suffix format Returns: - + (bool): flag to indicate whether provided date_suffix is valid """ - if date_suffix == '': return True elif dt.datetime.strptime(date_suffix, '_%Y%m') >= dt.datetime(min_year, 2, 1): return True - - else: - return False + + return False def clean_td(text): @@ -152,7 +149,7 @@ def get_b120_update_data(date_suffix=''): raise errors.B120SourceError('B120 updates in this format not available before Feb. 2018.') # parse HTML file structure; AJ forecast table - soup = BeautifulSoup(requests.get(url).content, 'lxml') + soup = BeautifulSoup(utils.get_session_response(url).content, 'lxml') tables = soup.find_all('table', {'class': 'doc-aj-table'}) # unused header info @@ -203,24 +200,22 @@ def get_b120_update_data(date_suffix=''): def get_120_archived_reports(year, month): """ Text-formatted reports available through CDEC javareports app for 2011-2017 - http://cdec.water.ca.gov/reportapp/javareports?name=B120.YYYYMM + https://cdec.water.ca.gov/reportapp/javareports?name=B120.YYYYMM Args: - year (int): - month (int): - + year (int): the year as 4-digit integer + month (int): the month as integer from 1 to 12 Returns: - (dict) + (dict): nested dictionary with two result dataframes and metadata """ - report_date = dt.datetime(year, month, 1) if not validate_date_suffix(report_date.strftime('_%Y%m'), min_year=2011): raise errors.B120SourceError('B120 Issuances before Feb. 2011 are avilable as PDF.') - url = f'http://cdec.water.ca.gov/reportapp/javareports?name=B120.{report_date:%Y%m}' + url = f'https://cdec.water.ca.gov/reportapp/javareports?name=B120.{report_date:%Y%m}' - result = requests.get(url).content + result = utils.get_session_response(url).content result = BeautifulSoup(result, 'lxml').find('pre').text tables = result.split('Water-Year (WY) Forecast and Monthly Distribution') @@ -247,17 +242,13 @@ def get_120_archived_reports(year, month): buf.seek(0) # parse fixed-width file - wy_df = pd.read_fwf(buf, header=[1, 2, 3], skiprows=[4,]) - wy_df.dropna(inplace=True) + wy_df = pd.read_fwf(buf, header=None, skiprows=[0, 1, 2, 3, 4]) - # clean columns - headers = clean_fixed_width_headers(wy_df.columns) - headers[0] = 'Hydrologic Region' - wy_df.columns = headers - - # separate 80% probability range - wy_df['90% Exceedance'] = wy_df['80% Probability Range'].str.split('-', expand=True)[0] - wy_df['10% Exceedance'] = wy_df['80% Probability Range'].str.split('-', expand=True)[1] + # assign column names + wy_df.columns = ['Hydrologic Region', 'Oct thru Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', + 'Aug', 'Sep', 'Water Year', '90% Exceedance', 'to', '10% Exceedance', 'WY % Avg'] + wy_df.dropna(inplace=True) + wy_df.drop('to', axis=1, inplace=True) # caption caption = [] @@ -269,7 +260,7 @@ def get_120_archived_reports(year, month): for line in result.split('Notes:')[1].split('For more information')[0].replace(u'\xa0', ' ').split('\r\n')[1:]: if bool(line.strip()): notes.append(line.strip()) - notes = [x+'.' for x in ''.join(notes).split('.')] + notes = [x + '.' for x in ''.join(notes).split('.')] info = { 'url': url, @@ -297,4 +288,3 @@ def april_july_dataframe(data_list): columns = ['Hydrologic Region', 'Watershed', 'Apr-Jul Forecast', '% of Avg', '90% Exceedance', '10% Exceedance'] df = pd.DataFrame(data_list, columns=columns) return df - diff --git a/collect/tests/test_dwr.py b/collect/tests/test_dwr.py index 839186b..ec765ad 100644 --- a/collect/tests/test_dwr.py +++ b/collect/tests/test_dwr.py @@ -20,15 +20,10 @@ from collect.dwr import swp -class TestB120(unittest.TestCase): - pass - - class TestCASGEM(unittest.TestCase): """ dwr.casgem module references inactive API; CASGEM tools must be updated once CNRA completes web transition """ - def deferred_test_get_casgem_data(self): return @@ -70,171 +65,233 @@ class TestCAWDL(unittest.TestCase): dwr.cawdl module references inactive API; CAWDL tools must be updated once CNRA/DWR completes web transition """ def deferred_test_get_cawdl_data(self): - cawdl.get_cawdl_data('17202') + result = cawdl.get_cawdl_data('17202') def deferred_test_get_cawdl_surface_water_data(self): - cawdl.get_cawdl_surface_water_data('17202', 2021, 'FLOW', interval='DAILY_MEAN') + result = cawdl.get_cawdl_surface_water_data('17202', 2021, 'FLOW', interval='DAILY_MEAN') def deferred_test_get_cawdl_surface_water_por(self): - cawdl.get_cawdl_surface_water_por('17202', 'FLOW', interval='DAILY_MEAN') + result = cawdl.get_cawdl_surface_water_por('17202', 'FLOW', interval='DAILY_MEAN') def deferred_test_get_cawdl_surface_water_site_report(self): - cawdl.get_cawdl_surface_water_site_report('17202') + result = cawdl.get_cawdl_surface_water_site_report('17202') class TestCDEC(unittest.TestCase): - def deferred_test_get_b120_data(self): - b120.get_b120_data(date_suffix='') - - def deferred_test_validate_date_suffix(self): - b120.validate_date_suffix(date_suffix, min_year=2017) - - def deferred_test_clean_td(self): - b120.clean_td(text) - - def deferred_test_get_b120_update_data(self): - b120.get_b120_update_data(date_suffix='') - - def deferred_test_get_120_archived_reports(self): - b120.get_120_archived_reports(year, month) - - def deferred_test_april_july_dataframe(self): - b120.april_july_dataframe(data_list) - - def deferred_test_get_station_url(self): - cdec.get_station_url(station, start, end, data_format='CSV', sensors=[], duration='') - - def deferred_test_get_station_sensors(self): - cdec.get_station_sensors(station, start, end) - - def deferred_test_get_station_data(self): - cdec.get_station_data(station, start, end, sensors=[], duration='') - - def deferred_test_get_raw_station_csv(self): - cdec.get_raw_station_csv(station, start, end, sensors=[], duration='', filename='') - - def deferred_test_get_raw_station_json(self): - cdec.get_raw_station_json(station, start, end, sensors=[], duration='', filename='') - - def deferred_test_get_sensor_frame(self): - cdec.get_sensor_frame(station, start, end, sensor='', duration='') - - def deferred_test_get_station_metadata(self): - cdec.get_station_metadata(station, as_geojson=False) - - def deferred_test_get_dam_metadata(self): - cdec.get_dam_metadata(station) - - def deferred_test_get_reservoir_metadata(self): - cdec.get_reservoir_metadata(station) - - def deferred_test__get_table_index(self): - cdec._get_table_index(table_type, tables) - - def deferred_test__parse_station_generic_table(self): - cdec._parse_station_generic_table(table) - - def deferred_test__parse_station_sensors_table(self): - cdec._parse_station_sensors_table(table) - - def deferred_test__parse_station_comments_table(self): - cdec._parse_station_comments_table(table) - - def deferred_test__parse_data_available(self): - cdec._parse_data_available(text) - - def deferred_test_get_data(self): - cdec.get_data(station, start, end, sensor='', duration='') - - def deferred_test_get_daily_snowpack_data(self): - cdec.get_daily_snowpack_data(region, start, end) - - -class TestSWP(unittest.TestCase): - - def deferred_test_prompt_installation_and_exit(self): - """ - test to ensure appropriate warning is printed when pdftotext is not installed; not yet implemented + def test_get_b120_data(self): """ - swp.prompt_installation_and_exit() - - def test_get_report_catalog(self): + test for B120 data-retrieval function relying on https://cdec.water.ca.gov/b120.html """ - test the default message behavior for get_report_catalog - """ - result = swp.get_report_catalog(console=False) - self.assertTrue('Oroville Operations' in result) - self.assertTrue('Weekly Summaries' in result) - - def test_get_report_url(self): + result = b120.get_b120_data(date_suffix='') + self.assertEqual(result['info']['title'], 'B-120 Water Supply Forecast Summary (posted on 05/07/20 16:31)') + self.assertEqual(result['info']['url'], 'https://cdec.water.ca.gov/b120.html') + self.assertEqual(result['info']['type'], 'B120 Forecast') + self.assertEqual(result['data']['Apr-Jul'].shape, (26, 6)) + self.assertEqual(result['data']['WY'].shape, (16, 14)) + + def test_validate_date_suffix(self): """ - verify get_report_url produces the expected URL formats + check the behavior of the validate_date_suffix method """ - # check one of the reservoir PDF reports - expected_url = '/'.join(['https://water.ca.gov/-/media', - 'DWR-Website', - 'Web-Pages', - 'Programs', - 'State-Water-Project', - 'Operations-And-Maintenance', - 'Files', - 'Operations-Control-Office', - 'Project-Wide-Operations', - 'Oroville-Weekly-Reservoir-Storage-Chart.pdf']) - self.assertEqual(swp.get_report_url('Oroville'), expected_url) - - # check one of the txt-formatted reports - expected_url = '/'.join(['https://data.cnra.ca.gov/dataset', - '742110dc-0d96-40bc-8e4e-f3594c6c4fe4', - 'resource', - '45c01d10-4da2-4ebb-8927-367b3bb1e601', - 'download', - 'dispatchers-monday-water-report.txt']) - self.assertEqual(swp.get_report_url('Mon'), expected_url) - - # check for invalid input - self.assertIsNone(swp.get_report_url('invalid')) - - def test_get_raw_text(self): + self.assertTrue(b120.validate_date_suffix('')) + self.assertTrue(b120.validate_date_suffix('_201804', min_year=2017)) + self.assertFalse(b120.validate_date_suffix('_201105', min_year=2017)) + + def test_clean_td(self): """ - test expected behavior for get_raw_text for pdf report and invalid text report + test to strip specified characters from text and convert to float or None, where applicable """ - # test for a PDF-formatted report - result = swp.get_raw_text('Delta Operations Summary (daily)') - self.assertIsInstance(result, str) - self.assertTrue(result.startswith('PRELIMINARY DATA')) - self.assertTrue(result.strip().endswith('please contact OCO_Export_Management@water.ca.gov')) - - # test for a text-formatted report - self.assertRaises(ValueError, swp.get_raw_text, 'Mon') - - def test_get_delta_daily_data(self): - result = swp.get_delta_daily_data('dict') - self.assertTrue(result['info']['title'].startswith('EXECUTIVE OPERATIONS SUMMARY ON ')) - self.assertIsInstance(result['data'], dict) - self.assertTrue('Reservoir Releases' in result['data']) - - def test_get_barker_slough_data(self): - result = swp.get_barker_slough_data() - self.assertEqual(result['info']['title'], 'BARKER SLOUGH PUMPING PLANT WEEKLY REPORT') - self.assertEqual(result['data'].shape, (7, 3)) - self.assertIsInstance(result['data'].index, pd.core.indexes.datetimes.DatetimeIndex) - - def test_get_oco_tabular_data(self): + self.assertEqual(b120.clean_td(' 8,000'), 8000) + self.assertEqual(b120.clean_td(' 5000 cfs'), '5000 cfs') + self.assertIsNone(b120.clean_td('')) + + def test_get_b120_update_data(self): """ - test tabular data extraction for the Water Quality Summary report using get_oco_tabular_data + test for B120 data-retrieval function relying on https://cdec.water.ca.gov/b120up.html """ - result = swp.get_oco_tabular_data('Water Quality Summary (daily)') - self.assertEqual(result['info']['filename'], 'Delta-Water-Quality-Daily-Summary.pdf') - self.assertIsInstance(result['info']['pages'], int) - self.assertIsInstance(result['data'], pd.DataFrame) - self.assertEqual(result['data'].shape, (30, 46)) - self.assertEqual(result['data'].index.name, 'Date (30 days)') - - -class TestWSI(unittest.TestCase): - pass + result = b120.get_b120_update_data(date_suffix='') + self.assertEqual(result['info']['title'], 'B-120 Water Supply Forecast Update Summary (posted on 06/10/20 13:44)') + self.assertEqual(result['info']['url'], 'https://cdec.water.ca.gov/b120up.html') + self.assertEqual(result['info']['type'], 'B120 Update') + self.assertEqual(result['data'].shape, (42, 9)) + + def test_get_120_archived_reports(self): + result = b120.get_120_archived_reports(2011, 4) + self.assertEqual(result['info']['title'], '.T WRB120.201104 1104081414/') + self.assertEqual(result['info']['url'], 'https://cdec.water.ca.gov/reportapp/javareports?name=B120.201104') + self.assertEqual(result['info']['type'], 'B120 Forecast') + self.assertEqual(result['info']['units'], 'TAF') + + self.assertEqual(result['data']['Apr-Jul'].shape, (26, 6)) + self.assertEqual(result['data']['Apr-Jul'].columns.tolist(), ['Hydrologic Region', + 'Watershed', + 'Apr-Jul Forecast', + '% of Avg', + '90% Exceedance', + '10% Exceedance']) + self.assertEqual(result['data']['WY'].shape, (16, 14)) + self.assertTrue('90% Exceedance' in result['data']['WY'].columns) + + def test_april_july_dataframe(self): + data_list = [['SACRAMENTO RIVER', 'Sacramento River above Shasta Lake', 120.0, '41%', None, None], + ['SACRAMENTO RIVER', 'McCloud River above Shasta Lake', 260.0, '68%', None, None], + ['SACRAMENTO RIVER', 'Pit River above Shasta Lake', 680.0, '67%', None, None], + ['SACRAMENTO RIVER', 'Total Inflow to Shasta Lake', 1050.0, '60%', 860.0, 1210.0], + ['SACRAMENTO RIVER', 'Sacramento River above Bend Bridge', 1480.0, '61%', 1230.0, 1750.0], + ['SACRAMENTO RIVER', 'Feather River at Oroville', 940.0, '55%', 780.0, 1080.0], + ['SACRAMENTO RIVER', 'Yuba River near Smartsville', 600.0, '62%', 480.0, 710.0], + ['SACRAMENTO RIVER', 'American River below Folsom Lake', 790.0, '66%', 650.0, 950.0]] + result = b120.april_july_dataframe(data_list) + self.assertIsInstance(result, pd.DataFrame) + self.assertEqual(result.shape, (8, 6)) + self.assertEqual(result.columns.tolist(), ['Hydrologic Region', 'Watershed', 'Apr-Jul Forecast', + '% of Avg', '90% Exceedance', '10% Exceedance']) + +# def deferred_test_get_station_url(self): +# result = cdec.get_station_url(station, start, end, data_format='CSV', sensors=[], duration='') +# print(result) + +# def deferred_test_get_station_sensors(self): +# result = cdec.get_station_sensors(station, start, end) +# print(result) + +# def deferred_test_get_station_data(self): +# result = cdec.get_station_data(station, start, end, sensors=[], duration='') +# print(result) + +# def deferred_test_get_raw_station_csv(self): +# result = cdec.get_raw_station_csv(station, start, end, sensors=[], duration='', filename='') +# print(result) + +# def deferred_test_get_raw_station_json(self): +# result = cdec.get_raw_station_json(station, start, end, sensors=[], duration='', filename='') +# print(result) + +# def deferred_test_get_sensor_frame(self): +# result = cdec.get_sensor_frame(station, start, end, sensor='', duration='') +# print(result) + +# def deferred_test_get_station_metadata(self): +# result = cdec.get_station_metadata(station, as_geojson=False) +# print(result) + +# def deferred_test_get_dam_metadata(self): +# result = cdec.get_dam_metadata(station) +# print(result) + +# def deferred_test_get_reservoir_metadata(self): +# result = cdec.get_reservoir_metadata(station) +# print(result) + +# def deferred_test__get_table_index(self): +# result = cdec._get_table_index(table_type, tables) +# print(result) + +# def deferred_test__parse_station_generic_table(self): +# result = cdec._parse_station_generic_table(table) +# print(result) + +# def deferred_test__parse_station_sensors_table(self): +# cdec._parse_station_sensors_table(table) + +# def deferred_test__parse_station_comments_table(self): +# cdec._parse_station_comments_table(table) + +# def deferred_test__parse_data_available(self): +# cdec._parse_data_available(text) + +# def deferred_test_get_data(self): +# cdec.get_data(station, start, end, sensor='', duration='') + +# def deferred_test_get_daily_snowpack_data(self): +# cdec.get_daily_snowpack_data(region, start, end) + + +# class TestSWP(unittest.TestCase): + + # def deferred_test_prompt_installation_and_exit(self): + # """ + # test to ensure appropriate warning is printed when pdftotext is not installed; not yet implemented + # """ + # swp.prompt_installation_and_exit() + + # def test_get_report_catalog(self): + # """ + # test the default message behavior for get_report_catalog + # """ + # result = swp.get_report_catalog(console=False) + # self.assertTrue('Oroville Operations' in result) + # self.assertTrue('Weekly Summaries' in result) + + # def test_get_report_url(self): + # """ + # verify get_report_url produces the expected URL formats + # """ + # # check one of the reservoir PDF reports + # expected_url = '/'.join(['https://water.ca.gov/-/media', + # 'DWR-Website', + # 'Web-Pages', + # 'Programs', + # 'State-Water-Project', + # 'Operations-And-Maintenance', + # 'Files', + # 'Operations-Control-Office', + # 'Project-Wide-Operations', + # 'Oroville-Weekly-Reservoir-Storage-Chart.pdf']) + # self.assertEqual(swp.get_report_url('Oroville'), expected_url) + + # # check one of the txt-formatted reports + # expected_url = '/'.join(['https://data.cnra.ca.gov/dataset', + # '742110dc-0d96-40bc-8e4e-f3594c6c4fe4', + # 'resource', + # '45c01d10-4da2-4ebb-8927-367b3bb1e601', + # 'download', + # 'dispatchers-monday-water-report.txt']) + # self.assertEqual(swp.get_report_url('Mon'), expected_url) + + # # check for invalid input + # self.assertIsNone(swp.get_report_url('invalid')) + + # def test_get_raw_text(self): + # """ + # test expected behavior for get_raw_text for pdf report and invalid text report + # """ + # # test for a PDF-formatted report + # result = swp.get_raw_text('Delta Operations Summary (daily)') + # self.assertIsInstance(result, str) + # self.assertTrue(result.startswith('PRELIMINARY DATA')) + # self.assertTrue(result.strip().endswith('please contact OCO_Export_Management@water.ca.gov')) + + # # test for a text-formatted report + # self.assertRaises(ValueError, swp.get_raw_text, 'Mon') + + # def test_get_delta_daily_data(self): + # result = swp.get_delta_daily_data('dict') + # self.assertTrue(result['info']['title'].startswith('EXECUTIVE OPERATIONS SUMMARY ON ')) + # self.assertIsInstance(result['data'], dict) + # self.assertTrue('Reservoir Releases' in result['data']) + + # def test_get_barker_slough_data(self): + # result = swp.get_barker_slough_data() + # self.assertEqual(result['info']['title'], 'BARKER SLOUGH PUMPING PLANT WEEKLY REPORT') + # self.assertEqual(result['data'].shape, (7, 3)) + # self.assertIsInstance(result['data'].index, pd.core.indexes.datetimes.DatetimeIndex) + + # def test_get_oco_tabular_data(self): + # """ + # test tabular data extraction for the Water Quality Summary report using get_oco_tabular_data + # """ + # result = swp.get_oco_tabular_data('Water Quality Summary (daily)') + # self.assertEqual(result['info']['filename'], 'Delta-Water-Quality-Daily-Summary.pdf') + # self.assertIsInstance(result['info']['pages'], int) + # self.assertIsInstance(result['data'], pd.DataFrame) + # self.assertEqual(result['data'].shape, (30, 46)) + # self.assertEqual(result['data'].index.name, 'Date (30 days)') + + +# class TestWSI(unittest.TestCase): +# pass if __name__ == '__main__': From 9d6ab626725d1e7bd3bba89e556428f7f9d93949 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 29 Nov 2023 18:59:01 -0800 Subject: [PATCH 21/36] Add DWR CDEC initial tests; minor changes to argument default types for improved clarity on expected sensor values --- collect/dwr/cdec/queries.py | 30 +-- collect/tests/test_dwr.py | 471 +++++++++++++++++++++++++----------- 2 files changed, 332 insertions(+), 169 deletions(-) diff --git a/collect/dwr/cdec/queries.py b/collect/dwr/cdec/queries.py index 7728b7f..52abe60 100644 --- a/collect/dwr/cdec/queries.py +++ b/collect/dwr/cdec/queries.py @@ -89,26 +89,6 @@ def get_station_url(station, start, end, data_format='CSV', sensors=[], duration return url -def get_station_sensors(station, start, end): - """ - Returns a `dict` of the available sensors for `station` for each duration in window - defined by `start` and `end`. - - Arguments: - station (str): the 3-letter CDEC station ID - start (dt.datetime): query start date - end (dt.datetime): query end date - Returns: - sensors (list): the available sensors for the station in this window - """ - sensors = {} - for duration in ['E', 'H', 'D', 'M']: - url = get_station_url(station, start, end, duration=duration) - df = pd.read_csv(url, header=0, na_values=['m', '---', ' ', 'ART', 'BRT', -9999, -9998, -9997], usecols=[0, 1, 2, 3]) - sensors.update({duration: list(df['SENSOR_TYPE'].unique())}) - return sensors - - def get_station_data(station, start, end, sensors=[], duration='', filename=None): """ General purpose function for returning a pandas DataFrame for all available @@ -207,7 +187,7 @@ def get_raw_station_json(station, start, end, sensors=[], duration='', filename= return result -def get_sensor_frame(station, start, end, sensor='', duration=''): +def get_sensor_frame(station, start, end, sensor=None, duration=''): """ return a pandas DataFrame of `station` data for a particular sensor, filtered by `duration` and `start` and `end` dates. @@ -216,7 +196,7 @@ def get_sensor_frame(station, start, end, sensor='', duration=''): station (str): the 3-letter CDEC station ID start (dt.datetime): query start date end (dt.datetime): query end date - sensor (str): the numeric sensor code + sensor (int): the numeric sensor code duration (str): interval code for timeseries data (ex: 'H') Returns: df (pandas.DataFrame): the queried timeseries for a single sensor as a DataFrame @@ -271,10 +251,10 @@ def get_station_metadata(station, as_geojson=False): # add site url site_info.update({'CDEC URL': f"{station}"}) - if soup.find('a', href=True, text='Dam Information'): + if soup.find('a', href=True, string='Dam Information'): site_info.update(get_dam_metadata(station)) - if soup.find('a', href=True, text='Reservoir Information'): + if soup.find('a', href=True, string='Reservoir Information'): site_info.update(get_reservoir_metadata(station)) # export a geojson feature (as dictionary) @@ -447,7 +427,7 @@ def _parse_data_available(text): return list(range(start.year, end.year + 1)) -def get_data(station, start, end, sensor='', duration=''): +def get_data(station, start, end, sensor=None, duration=''): """ return station date for a query bounded by start and end datetimes for a particular sensor/duration combination diff --git a/collect/tests/test_dwr.py b/collect/tests/test_dwr.py index ec765ad..e94cec2 100644 --- a/collect/tests/test_dwr.py +++ b/collect/tests/test_dwr.py @@ -11,6 +11,7 @@ import unittest import unittest.mock +from bs4 import BeautifulSoup import pandas as pd from collect.dwr import cdec @@ -148,150 +149,332 @@ def test_april_july_dataframe(self): self.assertEqual(result.columns.tolist(), ['Hydrologic Region', 'Watershed', 'Apr-Jul Forecast', '% of Avg', '90% Exceedance', '10% Exceedance']) -# def deferred_test_get_station_url(self): -# result = cdec.get_station_url(station, start, end, data_format='CSV', sensors=[], duration='') -# print(result) - -# def deferred_test_get_station_sensors(self): -# result = cdec.get_station_sensors(station, start, end) -# print(result) - -# def deferred_test_get_station_data(self): -# result = cdec.get_station_data(station, start, end, sensors=[], duration='') -# print(result) - -# def deferred_test_get_raw_station_csv(self): -# result = cdec.get_raw_station_csv(station, start, end, sensors=[], duration='', filename='') -# print(result) - -# def deferred_test_get_raw_station_json(self): -# result = cdec.get_raw_station_json(station, start, end, sensors=[], duration='', filename='') -# print(result) - -# def deferred_test_get_sensor_frame(self): -# result = cdec.get_sensor_frame(station, start, end, sensor='', duration='') -# print(result) - -# def deferred_test_get_station_metadata(self): -# result = cdec.get_station_metadata(station, as_geojson=False) -# print(result) - -# def deferred_test_get_dam_metadata(self): -# result = cdec.get_dam_metadata(station) -# print(result) - -# def deferred_test_get_reservoir_metadata(self): -# result = cdec.get_reservoir_metadata(station) -# print(result) - -# def deferred_test__get_table_index(self): -# result = cdec._get_table_index(table_type, tables) -# print(result) - -# def deferred_test__parse_station_generic_table(self): -# result = cdec._parse_station_generic_table(table) -# print(result) - -# def deferred_test__parse_station_sensors_table(self): -# cdec._parse_station_sensors_table(table) - -# def deferred_test__parse_station_comments_table(self): -# cdec._parse_station_comments_table(table) - -# def deferred_test__parse_data_available(self): -# cdec._parse_data_available(text) - -# def deferred_test_get_data(self): -# cdec.get_data(station, start, end, sensor='', duration='') - -# def deferred_test_get_daily_snowpack_data(self): -# cdec.get_daily_snowpack_data(region, start, end) - - -# class TestSWP(unittest.TestCase): - - # def deferred_test_prompt_installation_and_exit(self): - # """ - # test to ensure appropriate warning is printed when pdftotext is not installed; not yet implemented - # """ - # swp.prompt_installation_and_exit() - - # def test_get_report_catalog(self): - # """ - # test the default message behavior for get_report_catalog - # """ - # result = swp.get_report_catalog(console=False) - # self.assertTrue('Oroville Operations' in result) - # self.assertTrue('Weekly Summaries' in result) - - # def test_get_report_url(self): - # """ - # verify get_report_url produces the expected URL formats - # """ - # # check one of the reservoir PDF reports - # expected_url = '/'.join(['https://water.ca.gov/-/media', - # 'DWR-Website', - # 'Web-Pages', - # 'Programs', - # 'State-Water-Project', - # 'Operations-And-Maintenance', - # 'Files', - # 'Operations-Control-Office', - # 'Project-Wide-Operations', - # 'Oroville-Weekly-Reservoir-Storage-Chart.pdf']) - # self.assertEqual(swp.get_report_url('Oroville'), expected_url) - - # # check one of the txt-formatted reports - # expected_url = '/'.join(['https://data.cnra.ca.gov/dataset', - # '742110dc-0d96-40bc-8e4e-f3594c6c4fe4', - # 'resource', - # '45c01d10-4da2-4ebb-8927-367b3bb1e601', - # 'download', - # 'dispatchers-monday-water-report.txt']) - # self.assertEqual(swp.get_report_url('Mon'), expected_url) - - # # check for invalid input - # self.assertIsNone(swp.get_report_url('invalid')) - - # def test_get_raw_text(self): - # """ - # test expected behavior for get_raw_text for pdf report and invalid text report - # """ - # # test for a PDF-formatted report - # result = swp.get_raw_text('Delta Operations Summary (daily)') - # self.assertIsInstance(result, str) - # self.assertTrue(result.startswith('PRELIMINARY DATA')) - # self.assertTrue(result.strip().endswith('please contact OCO_Export_Management@water.ca.gov')) - - # # test for a text-formatted report - # self.assertRaises(ValueError, swp.get_raw_text, 'Mon') - - # def test_get_delta_daily_data(self): - # result = swp.get_delta_daily_data('dict') - # self.assertTrue(result['info']['title'].startswith('EXECUTIVE OPERATIONS SUMMARY ON ')) - # self.assertIsInstance(result['data'], dict) - # self.assertTrue('Reservoir Releases' in result['data']) - - # def test_get_barker_slough_data(self): - # result = swp.get_barker_slough_data() - # self.assertEqual(result['info']['title'], 'BARKER SLOUGH PUMPING PLANT WEEKLY REPORT') - # self.assertEqual(result['data'].shape, (7, 3)) - # self.assertIsInstance(result['data'].index, pd.core.indexes.datetimes.DatetimeIndex) - - # def test_get_oco_tabular_data(self): - # """ - # test tabular data extraction for the Water Quality Summary report using get_oco_tabular_data - # """ - # result = swp.get_oco_tabular_data('Water Quality Summary (daily)') - # self.assertEqual(result['info']['filename'], 'Delta-Water-Quality-Daily-Summary.pdf') - # self.assertIsInstance(result['info']['pages'], int) - # self.assertIsInstance(result['data'], pd.DataFrame) - # self.assertEqual(result['data'].shape, (30, 46)) - # self.assertEqual(result['data'].index.name, 'Date (30 days)') - - -# class TestWSI(unittest.TestCase): -# pass + def test_get_station_url(self): + """ + test the creation of query URL for a particular station, sensor set, data interval, and date range + """ + result = cdec.get_station_url('CFW', + dt.datetime(2023, 1, 1), + dt.datetime(2023, 1, 3), + data_format='CSV', + sensors=[6], + duration='H') + self.assertEqual(result, '&'.join(['https://cdec.water.ca.gov/dynamicapp/req/CSVDataServlet?Stations=CFW', + 'dur_code=H', + 'SensorNums=6', + 'Start=2023-01-01', + 'End=2023-01-03'])) + + def test_get_data(self): + """ + test retrieval of station timeseries and details data + """ + result = cdec.get_data('CFW', dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 3), sensor=6, duration='D') + self.assertIsInstance(result, dict) + self.assertIsInstance(result['info'], dict) + self.assertEqual(result['info']['title'], 'BEAR RIVER AT CAMP FAR WEST DAM') + self.assertIsInstance(result['data'], pd.DataFrame) + self.assertEqual(result['data']['VALUE'].values.tolist(), [300.48, 300.98, 300.72]) + + def test_get_sensor_frame(self): + """ + test timeseries data retrieval using the CSV query service for a particular date range and sensor combo + """ + result = cdec.get_sensor_frame('CFW', dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 3), sensor=15, duration='D') + self.assertIsInstance(result, pd.DataFrame) + self.assertEqual(result['VALUE'].values.tolist(), [105419.0, 106489.0, 105931.0]) + + def test_get_station_data(self): + """ + test duplicate function (with get_raw_station_csv) for retrieval of timeseries data + """ + result = cdec.get_station_data('CFW', + dt.datetime(2023, 1, 1), + dt.datetime(2023, 6, 3), + sensors=[15], + duration='M') + self.assertIsInstance(result, pd.DataFrame) + self.assertIsInstance(result, pd.DataFrame) + self.assertEqual(result.shape, (6, 9)) + self.assertEqual(result.tail(1).values.tolist()[0][:6], ['CFW', 'M', 15, 'STORAGE', '20230601 0000', 73931.0]) + + def test_get_raw_station_csv(self): + """ + test expected values for an hourly elevation data query + """ + result = cdec.get_raw_station_csv('CFW', + dt.datetime(2023, 1, 1), + dt.datetime(2023, 1, 3), + sensors=[6], + duration='H', + filename='') + self.assertIsInstance(result, pd.DataFrame) + self.assertEqual(result.shape, (49, 9)) + self.assertEqual(result.tail(1).values.tolist()[0][:6], ['CFW', 'H', 6, 'RES ELE', '20230103 0000', 300.98]) + + def test_get_raw_station_json(self): + """ + test retrieval of timeseries station data using the JSON query service + """ + result = cdec.get_raw_station_json('CFW', + dt.datetime(2023, 1, 1), + dt.datetime(2023, 1, 4), + sensors=[15], + duration='D', + filename='') + self.assertIsInstance(result, list) + self.assertEqual([(x['date'], x['value']) for x in result], [('2023-1-1 00:00', 105419), + ('2023-1-2 00:00', 106489), + ('2023-1-3 00:00', 105931), + ('2023-1-4 00:00', 105185)]) + + def test_get_station_metadata(self): + """ + test for retrieving station information from the CDEC detail page + """ + result = cdec.get_station_metadata('CFW', as_geojson=False) + self.assertIsInstance(result, dict) + self.assertIsInstance(result['info'], dict) + self.assertIsInstance(result['info']['dam'], dict) + self.assertIsInstance(result['info']['reservoir'], dict) + self.assertIsInstance(result['info']['sensors'], dict) + self.assertEqual(result['info']['title'], 'BEAR RIVER AT CAMP FAR WEST DAM') + self.assertEqual(result['info']['Station ID'], 'CFW') + self.assertEqual(result['info']['Latitude'], '39.049858°') + + def test_get_dam_metadata(self): + """ + test for retrieving dam information from the CDEC detail page + """ + result = cdec.get_dam_metadata('CFW') + self.assertIsInstance(result, dict) + self.assertIsInstance(result['dam'], dict) + self.assertEqual(result['dam']['title'], 'Dam Information') + self.assertEqual(result['dam']['Station ID'], 'CFW') + self.assertEqual(result['dam']['Dam Name'], 'CAMP FAR WEST') + self.assertEqual(result['dam']['National ID'], 'CA00227') + + def test_get_reservoir_metadata(self): + """ + test for retrieving reservoir information from the CDEC detail page + """ + result = cdec.get_reservoir_metadata('CFW') + self.assertIsInstance(result, dict) + self.assertIsInstance(result['reservoir'], dict) + self.assertEqual(result['reservoir']['title'], 'BEAR RIVER AT CAMP FAR WEST DAM (CFW)') + self.assertEqual(result['reservoir']['Station ID'], 'CFW') + self.assertEqual(result['reservoir']['Stream Name'], 'Bear River') + self.assertEqual(result['reservoir']['Capacity'], '104,500 af') + + def test__get_table_index(self): + """ + test function used to determine position of table in station detail page relative to other tables + """ + self.assertEqual(cdec.queries._get_table_index('site', [1]), 0) + self.assertEqual(cdec.queries._get_table_index('datum', [1, 1, 1, 1]), 1) + self.assertIsNone(cdec.queries._get_table_index('datum', [1, 1, 1])) + self.assertEqual(cdec.queries._get_table_index('sensors', [1, 1, 1, 1]), 2) + self.assertEqual(cdec.queries._get_table_index('comments', [1, 1, 1, 1]), 3) + self.assertIsNone(cdec.queries._get_table_index('other', [])) + + def test__parse_station_generic_table(self): + """ + test extraction of station general information and data availability from station detail page + """ + table = BeautifulSoup(textwrap.dedent("""\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Station IDCFWElevation260 ft
River BasinBEAR RIVERCountyYUBA
Hydrologic AreaSACRAMENTO RIVERNearby CityMARYSVILLE
Latitude39.049858°Longitude-121.315941°
OperatorCA Dept of Water Resources/DFM-Hydro-SMNMaintenanceCA Dept of Water Resources/DFM-Hydro-SMN
+ """)) + result = cdec.queries._parse_station_generic_table(table) + self.assertEqual(result, {'Station ID': 'CFW', + 'Elevation': '260 ft', + 'River Basin': 'BEAR RIVER', + 'County': 'YUBA', + 'Hydrologic Area': 'SACRAMENTO RIVER', + 'Nearby City': 'MARYSVILLE', + 'Latitude': '39.049858°', + 'Longitude': '-121.315941°', + 'Operator': 'CA Dept of Water Resources/DFM-Hydro-SMN', + 'Maintenance': 'CA Dept of Water Resources/DFM-Hydro-SMN'}) + + def test__parse_station_sensors_table(self): + """ + test extraction of sensor information and data availability from station detail page + """ + table = BeautifulSoup(textwrap.dedent("""\ + + + + + + +

+ +

+ + + + + + +
Sensor DescriptionSensor NumberDurationPlotData CollectionData Available
FLOW, RIVER DISCHARGE, CFS20 (event) (FLOW)COMPUTED 01/01/2021 to 01/01/2023
+ """)) + result = cdec.queries._parse_station_sensors_table(table) + self.assertEqual(result, {'20': {'event': {'description': 'FLOW, RIVER DISCHARGE, CFS', + 'sensor': '20', + 'duration': 'event', + 'collection': 'COMPUTED', + 'availability': '01/01/2021 to 01/01/2023', + 'years': [2021, 2022, 2023]}}}) + + def test__parse_station_comments_table(self): + table = BeautifulSoup(textwrap.dedent("""\ + + + + + + + + +
02/28/2023Example comment about data availability.
04/27/2020Example comment about datum info.
+ """)) + result = cdec.queries._parse_station_comments_table(table) + self.assertEqual(result, {'02/28/2023': 'Example comment about data availability.', + '04/27/2020': 'Example comment about datum info.'}) + + def test__parse_data_available(self): + """ + test generation of year list for the data availability from sensor table on station detail page + """ + result = cdec.queries._parse_data_available('01/01/2021 to 01/01/2023') + self.assertEqual(result, [2021, 2022, 2023]) + + def test_get_daily_snowpack_data(self): + """ + test for retrieving past daily snowpack data + """ + result = cdec.get_daily_snowpack_data('CENTRAL', dt.datetime(2023, 1, 1), dt.datetime(2023, 1, 3)) + self.assertEqual(result['info']['interval'], 'daily') + self.assertEqual(result['info']['region'], 'CENTRAL') + self.assertEqual(result['data'].shape, (3, 5)) + self.assertEqual(result['data'].tail(1).values.tolist(), [['CENTRAL', 53, 19.0, 70, 185]]) + + +class TestSWP(unittest.TestCase): + + def deferred_test_prompt_installation_and_exit(self): + """ + test to ensure appropriate warning is printed when pdftotext is not installed; not yet implemented + """ + swp.prompt_installation_and_exit() + + def test_get_report_catalog(self): + """ + test the default message behavior for get_report_catalog + """ + result = swp.get_report_catalog(console=False) + self.assertTrue('Oroville Operations' in result) + self.assertTrue('Weekly Summaries' in result) + + def test_get_report_url(self): + """ + verify get_report_url produces the expected URL formats + """ + # check one of the reservoir PDF reports + expected_url = '/'.join(['https://water.ca.gov/-/media', + 'DWR-Website', + 'Web-Pages', + 'Programs', + 'State-Water-Project', + 'Operations-And-Maintenance', + 'Files', + 'Operations-Control-Office', + 'Project-Wide-Operations', + 'Oroville-Weekly-Reservoir-Storage-Chart.pdf']) + self.assertEqual(swp.get_report_url('Oroville'), expected_url) + + # check one of the txt-formatted reports + expected_url = '/'.join(['https://data.cnra.ca.gov/dataset', + '742110dc-0d96-40bc-8e4e-f3594c6c4fe4', + 'resource', + '45c01d10-4da2-4ebb-8927-367b3bb1e601', + 'download', + 'dispatchers-monday-water-report.txt']) + self.assertEqual(swp.get_report_url('Mon'), expected_url) + + # check for invalid input + self.assertIsNone(swp.get_report_url('invalid')) + + def test_get_raw_text(self): + """ + test expected behavior for get_raw_text for pdf report and invalid text report + """ + # test for a PDF-formatted report + result = swp.get_raw_text('Delta Operations Summary (daily)') + self.assertIsInstance(result, str) + self.assertTrue(result.startswith('PRELIMINARY DATA')) + self.assertTrue(result.strip().endswith('please contact OCO_Export_Management@water.ca.gov')) + + # test for a text-formatted report + self.assertRaises(ValueError, swp.get_raw_text, 'Mon') + + def test_get_delta_daily_data(self): + result = swp.get_delta_daily_data('dict') + self.assertTrue(result['info']['title'].startswith('EXECUTIVE OPERATIONS SUMMARY ON ')) + self.assertIsInstance(result['data'], dict) + self.assertTrue('Reservoir Releases' in result['data']) + + def test_get_barker_slough_data(self): + result = swp.get_barker_slough_data() + self.assertEqual(result['info']['title'], 'BARKER SLOUGH PUMPING PLANT WEEKLY REPORT') + self.assertEqual(result['data'].shape, (7, 3)) + self.assertIsInstance(result['data'].index, pd.core.indexes.datetimes.DatetimeIndex) + + def test_get_oco_tabular_data(self): + """ + test tabular data extraction for the Water Quality Summary report using get_oco_tabular_data + """ + result = swp.get_oco_tabular_data('Water Quality Summary (daily)') + self.assertEqual(result['info']['filename'], 'Delta-Water-Quality-Daily-Summary.pdf') + self.assertIsInstance(result['info']['pages'], int) + self.assertIsInstance(result['data'], pd.DataFrame) + self.assertEqual(result['data'].shape, (30, 46)) + self.assertEqual(result['data'].index.name, 'Date (30 days)') + + +class TestWSI(unittest.TestCase): + pass if __name__ == '__main__': From 5b978121004e4c2cf0f37d1cba9876ce87441203 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Fri, 1 Nov 2024 18:29:27 -0700 Subject: [PATCH 22/36] Remove exit --- collect/dwr/swp.py | 1 - 1 file changed, 1 deletion(-) diff --git a/collect/dwr/swp.py b/collect/dwr/swp.py index e38ed63..84d1e50 100644 --- a/collect/dwr/swp.py +++ b/collect/dwr/swp.py @@ -16,7 +16,6 @@ import pdftotext except: print('Module pdftotext is required for SWP report collection. Install with `pip install pdftotext==2.2.2`') - exit() def get_report_catalog(console=True): From 2faf676ae9d8906b2629b4a1a425b88a44585f0c Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Tue, 26 Nov 2024 10:18:07 -0800 Subject: [PATCH 23/36] Update NID, DWR tests; update installation prompt --- collect/cvo/cvo.py | 3 ++- collect/tests/test_dwr.py | 6 ------ collect/tests/test_nid.py | 21 ++++++++++++++------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/collect/cvo/cvo.py b/collect/cvo/cvo.py index 4372001..e55ff7f 100644 --- a/collect/cvo/cvo.py +++ b/collect/cvo/cvo.py @@ -11,10 +11,11 @@ import dateutil.parser import pandas as pd import requests + try: from tabula import read_pdf except: - print('Module tabula is required for CVO report collection') + print('Module tabula is required for CVO report collection. Install with `pip install tabula-py==2.4.0`') REPORTS = [ diff --git a/collect/tests/test_dwr.py b/collect/tests/test_dwr.py index e94cec2..f9ef4d6 100644 --- a/collect/tests/test_dwr.py +++ b/collect/tests/test_dwr.py @@ -393,12 +393,6 @@ def test_get_daily_snowpack_data(self): class TestSWP(unittest.TestCase): - def deferred_test_prompt_installation_and_exit(self): - """ - test to ensure appropriate warning is printed when pdftotext is not installed; not yet implemented - """ - swp.prompt_installation_and_exit() - def test_get_report_catalog(self): """ test the default message behavior for get_report_catalog diff --git a/collect/tests/test_nid.py b/collect/tests/test_nid.py index 0d195fb..3c4a704 100644 --- a/collect/tests/test_nid.py +++ b/collect/tests/test_nid.py @@ -146,18 +146,25 @@ def test_get_daily_meta(self): result = nid.get_daily_meta(url=url, content=None) self.assertEqual(result['Site'], 'DC140 Tunnel Canal at Head') self.assertEqual(result['USGS #'], 'NO') - self.assertEqual(result['version'], 'USDAY V123') + self.assertTrue(result['version'].startswith('USDAY V')) def test_get_hourly_data(self): + expected_year = dt.date.today().year - 1 result = nid.get_hourly_data('WLSN', json_compatible=False) sample = result['data'].head() self.assertEqual(sample.index.strftime('%Y-%m-%d %H:%M:%S').tolist(), - ['2022-01-01 01:00:00', - '2022-01-01 02:00:00', - '2022-01-01 03:00:00', - '2022-01-01 04:00:00', - '2022-01-01 05:00:00']) - self.assertEqual(sample['Amount Diverted (AF)'].tolist(), [0.15, 0.15, 0.15, 0.15, 0.15]) + [f'{expected_year}-01-01 01:00:00', + f'{expected_year}-01-01 02:00:00', + f'{expected_year}-01-01 03:00:00', + f'{expected_year}-01-01 04:00:00', + f'{expected_year}-01-01 05:00:00']) + self.assertEqual(sample.columns.tolist(), + ['Date', + 'Time', + 'Measured Value', + 'Units', + 'Amount Diverted (AF)', + 'Quality']) def test_parse_qualifiers(self): series = pd.Series(data=['Qualities:', From 03dac34517ce49b27b726d436e945107a6d0d7cc Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Tue, 26 Nov 2024 10:55:40 -0800 Subject: [PATCH 24/36] Alert, cnrfc tests updates --- collect/tests/test_alert.py | 5 ++++- collect/tests/test_cnrfc.py | 29 +++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/collect/tests/test_alert.py b/collect/tests/test_alert.py index 9c1d426..51276b9 100644 --- a/collect/tests/test_alert.py +++ b/collect/tests/test_alert.py @@ -51,7 +51,10 @@ def test_get_sites(self): self.assertEqual(alert.get_sites(as_dataframe=True, datatype='stream').shape, (37, 10)) def test_get_sites_from_list(self): - self.assertEqual(alert.get_sites_from_list(as_dataframe=True, sensor_class=None).shape, (128, 4)) + """ + test the expected number of sites registered on the Sac Alert websites + """ + self.assertEqual(alert.get_sites_from_list(as_dataframe=True, sensor_class=None).shape, (127, 4)) def test_ustrip(self): self.assertEqual(alert.alert._ustrip('\u00A0'), '') diff --git a/collect/tests/test_cnrfc.py b/collect/tests/test_cnrfc.py index 6f9b448..56d6ac8 100644 --- a/collect/tests/test_cnrfc.py +++ b/collect/tests/test_cnrfc.py @@ -7,8 +7,10 @@ import datetime as dt import io import os +import re import textwrap import unittest +from zoneinfo import ZoneInfo from bs4 import BeautifulSoup from dotenv import load_dotenv @@ -179,7 +181,8 @@ def test_get_ensemble_forecast(self): test for current ensemble forecast file schema, using Vernalis forecast location """ result = cnrfc.get_ensemble_forecast('VNSC1', 'hourly', acre_feet=False, pdt_convert=False, as_pdt=False) - self.assertEqual(result['data'].shape, (721, 43)) + self.assertEqual(result['data'].shape[0], 721) + self.assertTrue(result['data'].shape[1] > 40) self.assertIsNone(result['data'].index.tzinfo) self.assertEqual(result['info']['watershed'], 'SanJoaquin') self.assertEqual(result['info']['units'], 'cfs') @@ -237,7 +240,7 @@ def test_get_rating_curve(self): example expected output from get_rating_curve method """ result = cnrfc.get_rating_curve('DCSC1') - self.assertEqual(result['data'][0], (0.92, 0.45)) + self.assertEqual(result['data'][0], (1.07, 0.45)) self.assertEqual(result['data'][-1], (15.0, 16300.0)) self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/data/ratings/DCSC1_rating.js') @@ -323,7 +326,8 @@ def test_get_ensemble_product_10(self): self.assertEqual(result['data'].shape, (5, 12)) self.assertEqual(result['data'].index.tolist(), ['10%', '25%', '50%(Median)', '75%', '90%']) self.assertEqual(result['info']['url'], 'https://www.cnrfc.noaa.gov/ensembleProduct.php?id=SHDC1&prodID=10') - self.assertEqual(result['info']['type'], 'Water Year Accumulated Volume Plot & Tabular Monthly Volume Accumulation') + self.assertEqual(result['info']['type'], + 'Water Year Accumulated Volume Plot & Tabular Monthly Volume Accumulation') self.assertEqual(result['info']['units'], 'TAF') def test__parse_blue_table(self): @@ -458,7 +462,7 @@ def test_get_ensemble_first_forecast_ordinate(self): ) self.assertIsInstance(result, dt.datetime) result_utc = utils.get_localized_datetime(result, 'UTC') - self.assertLess(result_utc, utils.get_localized_datetime(dt.datetime.now(), 'UTC')) + self.assertLess(result_utc, dt.datetime.now().astimezone(ZoneInfo('UTC'))) def test__get_forecast_csv(self): """ @@ -473,8 +477,9 @@ def test__get_forecast_csv(self): # check second line contains variables identifiers self.assertTrue(result.readline().decode('utf-8').startswith(',QINE,QINE')) - # check third line contains expected timeseries info - self.assertTrue(result.readline().decode('utf-8').startswith('2023-11-29 12:00:00,0.89311')) + # check third line starts with date/time of proper format and contains expected timeseries info + pattern = r'^(\d{4}-\d{2}-\d{2} \d{2}:00:00),((\d+.\d+,)+)' + self.assertTrue(len(re.match(pattern, result.readline().decode('utf-8')).groups()) > 2) def test_get_forecast_csvdata(self): """ @@ -485,7 +490,10 @@ def test_get_forecast_csvdata(self): self.assertIsInstance(result, io.BytesIO) self.assertTrue(result.readline().decode('utf-8').startswith('GMT,HLEC1')) self.assertTrue(result.readline().decode('utf-8').startswith(',QINE,QINE')) - self.assertTrue(result.readline().decode('utf-8').startswith('2023-11-29 12:00:00,0.89311')) + + # check third line starts with date/time of proper format and contains expected timeseries info + pattern = r'^(\d{4}-\d{2}-\d{2} \d{2}:00:00),((\d+.\d+,)+)' + self.assertTrue(len(re.match(pattern, result.readline().decode('utf-8')).groups()) > 2) def test__get_cnrfc_restricted_content(self): """ @@ -502,8 +510,13 @@ def test_download_watershed_file(self): """ test for downloading watershed file to local file system (in this case, downloaded to in-memory object) """ - result = cnrfc.download_watershed_file('WSI', '2023010112', 'ensemble', duration='daily', path=io.BytesIO()) + result, filename = cnrfc.download_watershed_file('WSI', + '2023010112', + 'ensemble', + duration='daily', + return_content=True) self.assertIsInstance(result, io.BytesIO) + self.assertIsInstance(filename, str) # check first line contains forecast point headers self.assertTrue(result.readline().decode('utf-8').startswith('GMT,SACC0')) From d328cc66cea5478c1d49317ed59e771dea7d3d69 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Tue, 26 Nov 2024 11:36:53 -0800 Subject: [PATCH 25/36] Update installation instructions --- README.md | 4 +-- pyproject.toml | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 pyproject.toml diff --git a/README.md b/README.md index 3315cea..4f9315e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Web scraping utilties for DWR, USACE, USGS, CNRFC, CVO and SacALERT data repositories. ## Setup instructions -### Create a virtual environment, specifying Python version 3.8 +### Create a virtual environment, specifying Python version 3.11 #### With pure Python (3+) Create a virtual environment with Python 3's built-in `venv` library. @@ -24,7 +24,7 @@ or ```$ source myenv/bin/activate``` (MacOS). Use the "editable" flag (-e) flag to make sure changes in your repo are propagated to any use of your virtualenv. ``` $ cd collect -$ python -m pip install -e . +$ python -m pip install --use-pep517 -e . ``` ### Install Java diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..af51ec5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,67 @@ +[build-system] +requires = ["setuptools >= 61.0"] +build-backend = "setuptools.build_meta" + +[project] +version = "0.0.2" +dynamic = ["version"] +name = "collect" +description = "Contains various web-scraping utilities used at MBK Engineers" +readme = "README.md" +requires-python = ">= 3.11" +dependencies = [ + "numpy>=1.21.3", + "beautifulsoup4==4.12.2", + "html5lib==1.1", + "lxml==4.9.3", + "pandas==1.5.3", + "python-dateutil>=2.8.2", + "python-dotenv==0.19.2", + "requests>=2.26.0", + "scipy>=1.8.0", + "selenium==4.15.2", + "tabula-py==2.4.0" + +] +authors = [ + {name = "Carly Narlesky", email = "narlesky@mbkengineers.com"}, + {name = "Olivia Alexander", email = "alexander@mbkengineers.com"}, + {name = "Sophie Danielsen", email = "danielsen@mbkengineers.com"}, + {name = "Carissa Abraham", email = "abraham@mbkengineers.com"}, + {name = "Jeremy Inducil", email = "inducil@mbkengineers.com"}, + {name = "Martin Liu", email = "liu@mbkengineers.com"}, + {name = "Gerardo Veliz Carrillo", email = "carrillo@mbkengineers.com"}, + {name = "Catherine Morales-Sandoval", email = "morales-sandoval@mbkengineers.com"}, + {name = "Jeremy Inducil", email = "inducil@mbkengineers.com"}, +] +maintainers = [ + {name = "Carly Narlesky", email = "narlesky@mbkengineers.com"} +] +keywords = [""] + +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.11", + "Operating System :: OS Independent", + "Topic :: Software Development :: Libraries :: Python Modules" +] + +[project.optional-dependencies] +swp = ["pdftotext==2.2.2"] +docs = [ + "Sphinx==4.3.0", + "sphinx-readable-theme==1.3.0", + "sphinx-rtd-theme==1.0.0" +] + +[project.scripts] +collect-start = "collect.bin.collect-start" + +[project.urls] +Homepage = "https://github.com/MBKEngineers/collect" +Documentation = "https://github.com/MBKEngineers/collect/docs" +Repository = "https://github.com/MBKEngineers/collect.git" +Issues = "https://github.com/MBKEngineers/collect/issues" + From a48555a897454b73366e950fe1e29ff542f590ae Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Tue, 26 Nov 2024 12:34:29 -0800 Subject: [PATCH 26/36] Expand use of utils.get_session_response; update tests; n-dash to m-dash conversion in SWP script --- collect/dwr/swp.py | 15 +++++++++++++-- collect/tests/test_alert.py | 2 +- collect/usace/wcds.py | 16 ++-------------- collect/usgs/usgs.py | 26 +++++++++++++++++++------- collect/utils/utils.py | 13 ++++++++++--- 5 files changed, 45 insertions(+), 27 deletions(-) diff --git a/collect/dwr/swp.py b/collect/dwr/swp.py index 84d1e50..5508e88 100644 --- a/collect/dwr/swp.py +++ b/collect/dwr/swp.py @@ -159,6 +159,9 @@ def get_delta_daily_data(export_as='dict'): """ content = get_raw_text('Delta Operations Summary (daily)', 'raw_export.txt') + with open ('helper.txt', 'w') as f: + f.write(content) + # extract current report's date rx = re.compile(r'(?P\d{1,2}/\d{1,2}/\d{4})') date = rx.search(content).group('date') @@ -182,6 +185,10 @@ def get_delta_daily_data(export_as='dict'): def _parse_entry(match): value = match.group('value') + # clean up dash to be parsed as minus sign for numeric entries + if re.match(r'^(‐\d+)', value) is not None: + value = value.replace('‐', '-') + units_match = re.findall(r'(cfs|TAF|km|%|% \(14-day avg\))$', value) units = units_match[0] if bool(units_match) else '' @@ -240,10 +247,14 @@ def _parse_entry(match): for x, v in result.items(): if isinstance(v, dict): for k in v.keys(): - v.update({k: extract[k]}) + + # cleaning for n-dash vs m-dash in source content + element = extract.get(k, extract.get(k.replace('-', '‐'))) + + v.update({k: element}) # update frame - df[x, k, extract[k]['units']] = extract[k]['value'] + df[x, k, element['units']] = element['value'] df.columns = pd.MultiIndex.from_tuples(df.columns) diff --git a/collect/tests/test_alert.py b/collect/tests/test_alert.py index 51276b9..2c1883b 100644 --- a/collect/tests/test_alert.py +++ b/collect/tests/test_alert.py @@ -47,7 +47,7 @@ def test_get_sites(self): """ test the function for retrieving site list for a particular gage types returns the expected number of entries """ - self.assertEqual(alert.get_sites(as_dataframe=True, datatype='rain').shape, (81, 12)) + self.assertEqual(alert.get_sites(as_dataframe=True, datatype='rain').shape, (80, 12)) self.assertEqual(alert.get_sites(as_dataframe=True, datatype='stream').shape, (37, 10)) def test_get_sites_from_list(self): diff --git a/collect/usace/wcds.py b/collect/usace/wcds.py index 05efad5..23883cc 100644 --- a/collect/usace/wcds.py +++ b/collect/usace/wcds.py @@ -10,13 +10,6 @@ import textwrap import pandas as pd -import urllib3.contrib.pyopenssl -urllib3.contrib.pyopenssl.inject_into_urllib3() - -import requests -from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry -import ssl from collect import utils @@ -40,7 +33,7 @@ def get_water_year_data(reservoir, water_year, interval='d'): url = f'https://www.spk-wc.usace.army.mil/plots/csv/{reservoir}{interval}_{water_year}.plot' # Read url data - response = requests.get(url, verify=ssl.CERT_NONE).content + response = utils.get_session_response(url).content df = pd.read_csv(io.StringIO(response.decode('utf-8')), header=0, na_values=['-', 'M']) # Check that user chosen water year is within range with data @@ -286,12 +279,7 @@ def get_reservoir_metadata(reservoir, water_year, interval='d'): url = f'https://www.spk-wc.usace.army.mil/plots/csv/{reservoir}{interval}_{water_year}.meta' # read data from url using requests session with retries - session = requests.Session() - retries = Retry(total=5, - backoff_factor=0.1, - status_forcelist=[500, 502, 503, 504]) - session.mount('https://', HTTPAdapter(max_retries=retries)) - response = session.get(url, verify=ssl.CERT_NONE) + response = utils.get_session_response(url) # complete metadata dictionary metadata_dict = response.json() diff --git a/collect/usgs/usgs.py b/collect/usgs/usgs.py index ed4e8bd..3f23312 100644 --- a/collect/usgs/usgs.py +++ b/collect/usgs/usgs.py @@ -6,9 +6,10 @@ # -*- coding: utf-8 -*- import datetime as dt from bs4 import BeautifulSoup -import dateutil.parser +# import dateutil.parser import pandas as pd -import requests + +from collect import utils def get_query_url(station_id, sensor, start_time, end_time, interval): @@ -34,7 +35,7 @@ def get_query_url(station_id, sensor, start_time, end_time, interval): format_start = start_time.strftime('%Y-%m-%d') format_end = end_time.strftime('%Y-%m-%d') - # construct query URL + # construct query URL url = '&'.join([f'https://waterservices.usgs.gov/nwis/{interval_code}v/?format=json', f'sites={station_id}', f'startDT={format_start}', @@ -56,7 +57,7 @@ def get_data(station_id, sensor, start_time, end_time, interval='instantaneous') 80155 Suspnd sedmnt disch(Mean) Arguments: - station_id (int or str): the USGS station code (ex: 11446220) + station_id (int or str): the USGS station code (ex: 11418500) sensor (str): ex '00060' (discharge) start_time (dt.datetime): ex dt.datetime(2016, 10, 1) end_time (dt.datetime): ex dt.datetime(2017, 10, 1) @@ -71,8 +72,19 @@ def get_data(station_id, sensor, start_time, end_time, interval='instantaneous') url = get_query_url(station_id, sensor, start_time, end_time, interval) # get gage data as json - data = requests.get(url, verify=False).json() - + data = utils.get_session_response(url).json() + + # if no timeseries data is available, return empty payload with only request parameters + if len(data['value']['timeSeries']) == 0: + return { + 'data': None, + 'info': { + 'site number': station_id, + 'sensor': sensor, + 'interval': interval + } + } + # process timeseries info series = data['value']['timeSeries'][0]['values'][0]['value'] for entry in series: @@ -136,7 +148,7 @@ def leap_filter(x): frame.index = pd.to_datetime(frame['peak_dt'].apply(leap_filter)) # load USGS site information - result = BeautifulSoup(requests.get(url.rstrip('rdb')).content, 'lxml') + result = BeautifulSoup(utils.get_session_response(url.rstrip('rdb')).content, 'lxml') info = {'site number': station_id, 'site name': result.find('h2').text} meta = result.findAll('div', {'class': 'leftsidetext'})[0] for div in meta.findChildren('div', {'align': 'left'}): diff --git a/collect/utils/utils.py b/collect/utils/utils.py index dae90ab..0281855 100644 --- a/collect/utils/utils.py +++ b/collect/utils/utils.py @@ -11,7 +11,7 @@ import urllib3.contrib.pyopenssl urllib3.contrib.pyopenssl.inject_into_urllib3() -import ssl +# import ssl import requests from requests.packages.urllib3.util.retry import Retry @@ -27,19 +27,26 @@ tz_function = timezone -def get_session_response(url, auth=None): +def get_session_response(url, auth=None, verify=None): """ + wraps request with a session and 5 retries; provides optional auth and verify parameters + Arguments: url (str): valid web URL + auth (requests.auth.HTTPBasicAuth): username/password verification for authenticating request + verify (bool or ssl.CERT_NONE): if provided, this verify parameter is passed to session.get Returns: (requests.models.Response): the response object with site content specified by URL """ + verify_kwarg = {} if verify is None else {'verify': verify} + + # initialize connection session = requests.Session() retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]) session.mount('https://', HTTPAdapter(max_retries=retries)) - return session.get(url, auth=auth, verify=ssl.CERT_NONE) + return session.get(url, auth=auth, **verify_kwarg) def get_web_status(url): From 6fb98a282f9bcaf20391bcc03bfbbbc31f8dc992 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 27 Nov 2024 10:47:42 -0800 Subject: [PATCH 27/36] Update USACE tests for compat with implemented trimming of get_wcds_data to start and end times --- collect/tests/test_usace.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/collect/tests/test_usace.py b/collect/tests/test_usace.py index aa121ae..07ccbfb 100644 --- a/collect/tests/test_usace.py +++ b/collect/tests/test_usace.py @@ -35,8 +35,8 @@ def test_get_water_year_data(self): def test_get_data(self): result = wcds.get_wcds_data('sha', dt.datetime(2023, 1, 15), dt.datetime(2023, 2, 1), interval='d') - self.assertEqual(result['data'].shape, (398, 16)) - self.assertEqual(result['data']['Storage'].tolist()[:4], [1592122.0, 1590203.0, 1585627.0, 1582232.0]) + self.assertEqual(result['data'].shape, (18, 16)) + self.assertEqual(result['data']['Storage'].tolist()[:4], [2235532.0, 2308907.0, 2357517.0, 2392661.0]) def test_get_wcds_reservoirs(self): """ @@ -46,8 +46,8 @@ def test_get_wcds_reservoirs(self): def test_get_wcds_data(self): result = wcds.get_wcds_data('sha', dt.datetime(2023, 1, 15), dt.datetime(2023, 2, 1), interval='d') - self.assertEqual(result['data'].shape, (398, 16)) - self.assertEqual(result['data']['Storage'].tolist()[:4], [1592122.0, 1590203.0, 1585627.0, 1582232.0]) + self.assertEqual(result['data'].shape, (18, 16)) + self.assertEqual(result['data']['Storage'].tolist()[:4], [2235532.0, 2308907.0, 2357517.0, 2392661.0]) def test_get_release_report(self): self.assertEqual(wcds.get_release_report('buc')['info']['units'], 'cfs') From 45cb26d4001b91cb14136a701d3a5f0d54312c84 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 27 Nov 2024 11:03:59 -0800 Subject: [PATCH 28/36] Rollback utils.get_session_response implementation for memory allocation warnings in test suite; update assertion in alert test --- collect/alert/alert.py | 14 ++++++++------ collect/dwr/b120.py | 25 ++++++++++++------------- collect/dwr/swp.py | 7 +++---- collect/nid/nid.py | 11 ++++++----- collect/tests/test_alert.py | 2 +- collect/usace/wcds.py | 9 ++++++--- collect/usgs/usgs.py | 7 +++---- 7 files changed, 39 insertions(+), 36 deletions(-) diff --git a/collect/alert/alert.py b/collect/alert/alert.py index c7dfc48..0d383ff 100644 --- a/collect/alert/alert.py +++ b/collect/alert/alert.py @@ -10,6 +10,8 @@ from bs4 import BeautifulSoup, SoupStrainer import pandas as pd +import requests + from collect import utils @@ -64,7 +66,7 @@ def get_sites(as_dataframe=True, datatype='stream'): group_type_id = {'rain': 14, 'stream': 19, 'temperature': 30}.get(datatype) url = f'https://www.sacflood.org/{measure}?&view_id=1&group_type_id={group_type_id}' - soup = BeautifulSoup(utils.get_session_response(url).text, 'lxml') + soup = BeautifulSoup(requests.get(url).text, 'lxml') df = pd.read_html(str(soup.find('table')))[0] # strip whitespace from columns @@ -87,7 +89,7 @@ def get_sites_from_list(as_dataframe=True, sensor_class=None): url = 'https://www.sacflood.org/list/' if sensor_class: url += '?&sensor_class={}'.format(sensor_class) - soup = BeautifulSoup(utils.get_session_response(url).text, 'lxml') + soup = BeautifulSoup(requests.get(url).text, 'lxml') entries = [] for x in soup.find_all('a', {'class': None, 'target': None}, @@ -114,7 +116,7 @@ def get_site_notes(site_id): """ url = f'https://www.sacflood.org/site/?site_id={site_id}' strainer = SoupStrainer('div', {'class': 'card-body'}) - soup = BeautifulSoup(utils.get_session_response(url).text, 'lxml', parse_only=strainer) + soup = BeautifulSoup(requests.get(url).text, 'lxml', parse_only=strainer) for card in soup.find_all('div', {'class': 'card-body'}): if 'Notes' in card.find('h3', {'class': 'card-title'}).text: notes_block = card.find('p', {'class': 'list-group-item-text'}) @@ -137,7 +139,7 @@ def get_site_location(site_id): """ url = f'https://www.sacflood.org/site/?site_id={site_id}' result = {'site_id': site_id, 'url': url} - soup = BeautifulSoup(utils.get_session_response(url).text, 'lxml') + soup = BeautifulSoup(requests.get(url).text, 'lxml') cards = soup.find_all('div', {'class': 'card-body'}) for card in cards: if 'Map' in card.find('h3', {'class': 'card-title'}).text: @@ -156,7 +158,7 @@ def get_site_sensors(site_id): """ url = f'https://www.sacflood.org/site/?site_id={site_id}' result = {'site_id': site_id, 'url': url, 'sensors': []} - soup = BeautifulSoup(utils.get_session_response(url).text, 'lxml') + soup = BeautifulSoup(requests.get(url).text, 'lxml') cards = soup.find_all('div', {'class': 'card-body'}) for card in cards: if 'Sensors' in card.find('h3', {'class': 'card-title'}).text: @@ -206,7 +208,7 @@ def get_query_url(site_id, device_id, start, end): def get_device_series(site_id, device_id, start, end, ascending=True): url = get_query_url(site_id, device_id, start, end) - response = io.StringIO(utils.get_session_response(url).text) + response = io.StringIO(requests.get(url).text) df = pd.read_csv(response) return df.sort_values(by='Reading', ascending=ascending) diff --git a/collect/dwr/b120.py b/collect/dwr/b120.py index d56c504..fef51f5 100644 --- a/collect/dwr/b120.py +++ b/collect/dwr/b120.py @@ -2,27 +2,26 @@ collect.dwr.b120 ============================================================ access DWR Bulletin 120 forecast data + +TODO - add support for historical reports in format: + https://cdec.water.ca.gov/reportapp/javareports?name=B120.201203 + https://cdec.water.ca.gov/reportapp/javareports?name=B120.201802 + +TODO - check updated homepage for bulletin 120 for new links + https://cdec.water.ca.gov/snow/bulletin120/index2.html + tie validation of dates to https://cdec.water.ca.gov/prev_b120.html and https://cdec.water.ca.gov/prev_b120up.html """ # -*- coding: utf-8 -*- import datetime as dt import io import re + from bs4 import BeautifulSoup import pandas as pd import requests from collect.dwr import errors -from collect.utils import utils - - -# TODO - add support for historical reports in format: -# https://cdec.water.ca.gov/reportapp/javareports?name=B120.201203 -# https://cdec.water.ca.gov/reportapp/javareports?name=B120.201802 - -# TODO - check updated homepage for bulletin 120 for new links -# https://cdec.water.ca.gov/snow/bulletin120/index2.html -# tie validation of dates to https://cdec.water.ca.gov/prev_b120.html and https://cdec.water.ca.gov/prev_b120up.html def get_b120_data(date_suffix=''): """ @@ -43,7 +42,7 @@ def get_b120_data(date_suffix=''): url = 'https://cdec.water.ca.gov/b120{}.html'.format(date_suffix) # parse HTML file structure; AJ forecast table - soup = BeautifulSoup(utils.get_session_response(url).content, 'html5lib') + soup = BeautifulSoup(requests.get(url).content, 'html5lib') table = soup.find('table', {'class': 'doc-aj-table'}) # read HTML table with April-July Forecast Summary (TAF) @@ -149,7 +148,7 @@ def get_b120_update_data(date_suffix=''): raise errors.B120SourceError('B120 updates in this format not available before Feb. 2018.') # parse HTML file structure; AJ forecast table - soup = BeautifulSoup(utils.get_session_response(url).content, 'lxml') + soup = BeautifulSoup(requests.get(url).content, 'lxml') tables = soup.find_all('table', {'class': 'doc-aj-table'}) # unused header info @@ -215,7 +214,7 @@ def get_120_archived_reports(year, month): url = f'https://cdec.water.ca.gov/reportapp/javareports?name=B120.{report_date:%Y%m}' - result = utils.get_session_response(url).content + result = requests.get(url).content result = BeautifulSoup(result, 'lxml').find('pre').text tables = result.split('Water-Year (WY) Forecast and Monthly Distribution') diff --git a/collect/dwr/swp.py b/collect/dwr/swp.py index 5508e88..277820d 100644 --- a/collect/dwr/swp.py +++ b/collect/dwr/swp.py @@ -9,8 +9,7 @@ import re import pandas as pd -from collect import utils - +import requests try: import pdftotext @@ -128,7 +127,7 @@ def get_raw_text(report, filename=None, preserve_white_space=True): raise ValueError(f'ERROR: {report} is not PDF-formatted') # request report content from URL - with io.BytesIO(utils.get_session_response(url).content) as buf: + with io.BytesIO(requests.get(url).content) as buf: # parse PDF and extract as string content = pdftotext.PDF(buf, raw=False, physical=True)[0] @@ -310,7 +309,7 @@ def get_oco_tabular_data(report): url = get_report_url(report) # request report content from URL - with io.BytesIO(utils.get_session_response(url).content) as buf: + with io.BytesIO(requests.get(url).content) as buf: # parse PDF and extract as string content = list(pdftotext.PDF(buf, raw=False, physical=True)) diff --git a/collect/nid/nid.py b/collect/nid/nid.py index ca458b9..3ef5fbe 100644 --- a/collect/nid/nid.py +++ b/collect/nid/nid.py @@ -36,6 +36,7 @@ from bs4 import BeautifulSoup import pandas as pd +import requests from collect import utils @@ -48,7 +49,7 @@ def get_sites(): sites (dict): dictionary of site IDs and titles """ url = 'https://river-lake.nidwater.com/hyquick/index.htm' - df = pd.read_html(utils.get_session_response(url).content, header=1, index_col=0)[0] + df = pd.read_html(requests.get(url).content, header=1, index_col=0)[0] sites = df.to_dict()['Name'] return sites @@ -61,7 +62,7 @@ def get_issue_date(): issue_date (datetime.datetime): the last update of the NID hyquick page """ url = 'https://river-lake.nidwater.com/hyquick/index.htm' - df = pd.read_html(utils.get_session_response(url).content, header=None)[0] + df = pd.read_html(requests.get(url).content, header=None)[0] return dt.datetime.strptime(df.iloc[0, 1], 'Run on %Y/%m/%d %H:%M:%S') @@ -74,7 +75,7 @@ def get_site_files(site): links (list): sorted list of linked files available for site """ url = get_station_url(site, metric='index') - soup = BeautifulSoup(utils.get_session_response(url).content, 'lxml') + soup = BeautifulSoup(requests.get(url).content, 'lxml') links = {a.get('href') for a in soup.find_all('a')} return sorted(links) @@ -128,7 +129,7 @@ def get_daily_data(site, json_compatible=False): """ metric = get_site_metric(site, interval='daily') url = get_station_url(site, metric=metric, interval='daily') - response = utils.get_session_response(url).text + response = requests.get(url).text frames = [] for group in re.split(r'(?=Nevada Irrigation District\s+)', response): @@ -194,7 +195,7 @@ def get_daily_meta(url=None, content=None): """ if url: data = [re.sub(r'\s{2,}|:\s+|:', '|', x.strip()).split('|') - for x in utils.get_session_response(url).text.splitlines()[:10]] + for x in requests.get(url).text.splitlines()[:10]] elif content: data = [re.sub(r'\s{2,}|:\s+|:', '|', x.strip()).split('|') for x in content.splitlines()] diff --git a/collect/tests/test_alert.py b/collect/tests/test_alert.py index 2c1883b..704efb7 100644 --- a/collect/tests/test_alert.py +++ b/collect/tests/test_alert.py @@ -48,7 +48,7 @@ def test_get_sites(self): test the function for retrieving site list for a particular gage types returns the expected number of entries """ self.assertEqual(alert.get_sites(as_dataframe=True, datatype='rain').shape, (80, 12)) - self.assertEqual(alert.get_sites(as_dataframe=True, datatype='stream').shape, (37, 10)) + self.assertEqual(alert.get_sites(as_dataframe=True, datatype='stream').shape, (35, 10)) def test_get_sites_from_list(self): """ diff --git a/collect/usace/wcds.py b/collect/usace/wcds.py index 23883cc..9242f72 100644 --- a/collect/usace/wcds.py +++ b/collect/usace/wcds.py @@ -8,7 +8,10 @@ import io import re import textwrap + import pandas as pd +import requests +import ssl from collect import utils @@ -33,7 +36,7 @@ def get_water_year_data(reservoir, water_year, interval='d'): url = f'https://www.spk-wc.usace.army.mil/plots/csv/{reservoir}{interval}_{water_year}.plot' # Read url data - response = utils.get_session_response(url).content + response = requests.get(url, verify=ssl.CERT_NONE).content df = pd.read_csv(io.StringIO(response.decode('utf-8')), header=0, na_values=['-', 'M']) # Check that user chosen water year is within range with data @@ -220,7 +223,7 @@ def get_release_report(reservoir): url = f'https://www.spk-wc.usace.army.mil/reports/release/rel-{reservoir}' # request data from url - response = utils.get_session_response(url).content + response = requests.get(url, verify=ssl.CERT_NONE).content raw = response.decode('utf-8') # check for header matching pattern with pipe delimiters @@ -279,7 +282,7 @@ def get_reservoir_metadata(reservoir, water_year, interval='d'): url = f'https://www.spk-wc.usace.army.mil/plots/csv/{reservoir}{interval}_{water_year}.meta' # read data from url using requests session with retries - response = utils.get_session_response(url) + response = requests.get(url, verify=ssl.CERT_NONE) # complete metadata dictionary metadata_dict = response.json() diff --git a/collect/usgs/usgs.py b/collect/usgs/usgs.py index 3f23312..45a9697 100644 --- a/collect/usgs/usgs.py +++ b/collect/usgs/usgs.py @@ -8,8 +8,7 @@ from bs4 import BeautifulSoup # import dateutil.parser import pandas as pd - -from collect import utils +import requests def get_query_url(station_id, sensor, start_time, end_time, interval): @@ -72,7 +71,7 @@ def get_data(station_id, sensor, start_time, end_time, interval='instantaneous') url = get_query_url(station_id, sensor, start_time, end_time, interval) # get gage data as json - data = utils.get_session_response(url).json() + data = requests.get(url).json() # if no timeseries data is available, return empty payload with only request parameters if len(data['value']['timeSeries']) == 0: @@ -148,7 +147,7 @@ def leap_filter(x): frame.index = pd.to_datetime(frame['peak_dt'].apply(leap_filter)) # load USGS site information - result = BeautifulSoup(utils.get_session_response(url.rstrip('rdb')).content, 'lxml') + result = BeautifulSoup(requests.get(url.rstrip('rdb')).content, 'lxml') info = {'site number': station_id, 'site name': result.find('h2').text} meta = result.findAll('div', {'class': 'leftsidetext'})[0] for div in meta.findChildren('div', {'align': 'left'}): From f69ec9c6e858baf6729b49477cae99a446625803 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 27 Nov 2024 14:10:02 -0800 Subject: [PATCH 29/36] Use lxml in place of html5lib where possible --- collect/alert/alert.py | 3 +- collect/cnrfc/cnrfc.py | 3 +- collect/cvo/cvo.py | 2 +- collect/dwr/b120.py | 2 +- collect/dwr/wsi.py | 2 +- collect/nid/nid.py | 4 +- collect/tests/test_cnrfc.py | 8 +- collect/tests/test_dwr.py | 161 +++++++++++++++++++----------------- collect/utils/filters.py | 5 +- 9 files changed, 100 insertions(+), 90 deletions(-) diff --git a/collect/alert/alert.py b/collect/alert/alert.py index 0d383ff..12c3e7b 100644 --- a/collect/alert/alert.py +++ b/collect/alert/alert.py @@ -67,7 +67,8 @@ def get_sites(as_dataframe=True, datatype='stream'): url = f'https://www.sacflood.org/{measure}?&view_id=1&group_type_id={group_type_id}' soup = BeautifulSoup(requests.get(url).text, 'lxml') - df = pd.read_html(str(soup.find('table')))[0] + with io.StringIO(str(soup.find('table'))) as text: + df = pd.read_html(text)[0] # strip whitespace from columns df.columns = [_ustrip(x) for x in df.columns] diff --git a/collect/cnrfc/cnrfc.py b/collect/cnrfc/cnrfc.py index 2defa7a..0b71991 100644 --- a/collect/cnrfc/cnrfc.py +++ b/collect/cnrfc/cnrfc.py @@ -565,7 +565,8 @@ def parse_forecast_archive_table(url): Returns: df (pandas.DataFrame): dataframe containing HTML table summarizing last forecast issuances for product page """ - df = pd.read_html(utils.get_session_response(url).text)[0] + with io.StringIO(utils.get_session_response(url).text) as text: + df = pd.read_html(text)[0] # extract the header row and assign as column names df.columns = df.iloc[1,:] diff --git a/collect/cvo/cvo.py b/collect/cvo/cvo.py index e55ff7f..d20cac6 100644 --- a/collect/cvo/cvo.py +++ b/collect/cvo/cvo.py @@ -712,7 +712,7 @@ def doutdly_data_cleaner(content, report_type, date_structure): # drop COA columns with no data if 'COA USBR' in df: if df['COA USBR']['Account Balance'].dropna().empty: - df.drop('COA USBR', axis=1, inplace=True) + df.drop('COA USBR', level=0, axis=1, inplace=True) # return converted dataframe; drop NaN values return df.dropna(how='all').reindex() diff --git a/collect/dwr/b120.py b/collect/dwr/b120.py index fef51f5..9a628d3 100644 --- a/collect/dwr/b120.py +++ b/collect/dwr/b120.py @@ -42,7 +42,7 @@ def get_b120_data(date_suffix=''): url = 'https://cdec.water.ca.gov/b120{}.html'.format(date_suffix) # parse HTML file structure; AJ forecast table - soup = BeautifulSoup(requests.get(url).content, 'html5lib') + soup = BeautifulSoup(requests.get(url).content, 'lxml') table = soup.find('table', {'class': 'doc-aj-table'}) # read HTML table with April-July Forecast Summary (TAF) diff --git a/collect/dwr/wsi.py b/collect/dwr/wsi.py index 26419f9..c5f2d20 100644 --- a/collect/dwr/wsi.py +++ b/collect/dwr/wsi.py @@ -36,7 +36,7 @@ def get_wsi_data(): url = 'http://cdec.water.ca.gov/reportapp/javareports?name=wsihist' # parse HTML file structure; AJ forecast table - soup = BeautifulSoup(requests.get(url).content, 'html5lib') + soup = BeautifulSoup(requests.get(url).content, 'lxml') table = soup.find('pre').text # three tables on this page diff --git a/collect/nid/nid.py b/collect/nid/nid.py index 3ef5fbe..9b43926 100644 --- a/collect/nid/nid.py +++ b/collect/nid/nid.py @@ -165,7 +165,7 @@ def get_daily_data(site, json_compatible=False): df.dropna(inplace=True) # convert index to datetimes - df.index = pd.to_datetime(df.index) + df.index = pd.to_datetime(df.index, format='%d%b%Y') df = df.reindex(pd.date_range(start=df.index[0], end=df.index[-1])) frames.append(df) @@ -238,7 +238,7 @@ def get_hourly_data(site, json_compatible=False): 'Quality': int}) # convert to date/time index - df.index = pd.to_datetime(df['Date']+df['Time']) + df.index = pd.to_datetime(df['Date'] + df['Time']) # remove extra whitespace in data entries df.loc[:, 'Time'] = df.loc[:, 'Time'].str.strip() diff --git a/collect/tests/test_cnrfc.py b/collect/tests/test_cnrfc.py index 56d6ac8..c2df682 100644 --- a/collect/tests/test_cnrfc.py +++ b/collect/tests/test_cnrfc.py @@ -334,7 +334,7 @@ def test__parse_blue_table(self): """ test the processing of included data table for monthly summary associated with ensemble products like 2, 10, etc """ - table_soup = BeautifulSoup(textwrap.dedent("""/ + table_soup = BeautifulSoup(io.StringIO(textwrap.dedent("""/ @@ -375,7 +375,7 @@ def test__parse_blue_table(self):
Title61.2
- """), 'lxml') + """)), 'lxml') result = cnrfc.cnrfc._parse_blue_table(table_soup) self.assertIsInstance(result[0], pd.DataFrame) self.assertEqual(result[0]['Probability'].tolist(), ['10%', '25%', '50%(Median)']) @@ -386,7 +386,7 @@ def test__apply_conversions(self): test application of UTC->PST/PDT and kcfs->cfs or kcfs->acre-feet unit conversions for a sample ensemble """ df = pd.DataFrame(data=[[0.111, 0.222, 0.333]] * 6, - index=pd.date_range('2023-11-01 12:00:00', periods=6, freq='H')) + index=pd.date_range('2023-11-01 12:00:00', periods=6, freq='h')) # test for conversion of kcfs -> acre-feet, no timezone handling result = cnrfc.cnrfc._apply_conversions(df, 'hourly', True, False, False) @@ -397,7 +397,7 @@ def test__apply_conversions(self): # reset test frame df = pd.DataFrame(data=[[0.111, 0.222, 0.333]] * 6, - index=pd.date_range('2023-11-01 12:00:00', periods=6, freq='H')) + index=pd.date_range('2023-11-01 12:00:00', periods=6, freq='h')) # test for conversion of timezone and kcfs -> cfs result = cnrfc.cnrfc._apply_conversions(df, 'hourly', False, True, True) diff --git a/collect/tests/test_dwr.py b/collect/tests/test_dwr.py index f9ef4d6..f8ad7cc 100644 --- a/collect/tests/test_dwr.py +++ b/collect/tests/test_dwr.py @@ -314,7 +314,7 @@ def test__parse_station_generic_table(self): CA Dept of Water Resources/DFM-Hydro-SMN - """)) + """), features='lxml') result = cdec.queries._parse_station_generic_table(table) self.assertEqual(result, {'Station ID': 'CFW', 'Elevation': '260 ft', @@ -348,7 +348,7 @@ def test__parse_station_sensors_table(self): 01/01/2021 to 01/01/2023 - """)) + """), features='lxml') result = cdec.queries._parse_station_sensors_table(table) self.assertEqual(result, {'20': {'event': {'description': 'FLOW, RIVER DISCHARGE, CFS', 'sensor': '20', @@ -368,7 +368,7 @@ def test__parse_station_comments_table(self): Example comment about datum info. - """)) + """), features='lxml') result = cdec.queries._parse_station_comments_table(table) self.assertEqual(result, {'02/28/2023': 'Example comment about data availability.', '04/27/2020': 'Example comment about datum info.'}) @@ -390,81 +390,86 @@ def test_get_daily_snowpack_data(self): self.assertEqual(result['data'].shape, (3, 5)) self.assertEqual(result['data'].tail(1).values.tolist(), [['CENTRAL', 53, 19.0, 70, 185]]) - -class TestSWP(unittest.TestCase): - - def test_get_report_catalog(self): - """ - test the default message behavior for get_report_catalog - """ - result = swp.get_report_catalog(console=False) - self.assertTrue('Oroville Operations' in result) - self.assertTrue('Weekly Summaries' in result) - - def test_get_report_url(self): - """ - verify get_report_url produces the expected URL formats - """ - # check one of the reservoir PDF reports - expected_url = '/'.join(['https://water.ca.gov/-/media', - 'DWR-Website', - 'Web-Pages', - 'Programs', - 'State-Water-Project', - 'Operations-And-Maintenance', - 'Files', - 'Operations-Control-Office', - 'Project-Wide-Operations', - 'Oroville-Weekly-Reservoir-Storage-Chart.pdf']) - self.assertEqual(swp.get_report_url('Oroville'), expected_url) - - # check one of the txt-formatted reports - expected_url = '/'.join(['https://data.cnra.ca.gov/dataset', - '742110dc-0d96-40bc-8e4e-f3594c6c4fe4', - 'resource', - '45c01d10-4da2-4ebb-8927-367b3bb1e601', - 'download', - 'dispatchers-monday-water-report.txt']) - self.assertEqual(swp.get_report_url('Mon'), expected_url) - - # check for invalid input - self.assertIsNone(swp.get_report_url('invalid')) - - def test_get_raw_text(self): - """ - test expected behavior for get_raw_text for pdf report and invalid text report - """ - # test for a PDF-formatted report - result = swp.get_raw_text('Delta Operations Summary (daily)') - self.assertIsInstance(result, str) - self.assertTrue(result.startswith('PRELIMINARY DATA')) - self.assertTrue(result.strip().endswith('please contact OCO_Export_Management@water.ca.gov')) - - # test for a text-formatted report - self.assertRaises(ValueError, swp.get_raw_text, 'Mon') - - def test_get_delta_daily_data(self): - result = swp.get_delta_daily_data('dict') - self.assertTrue(result['info']['title'].startswith('EXECUTIVE OPERATIONS SUMMARY ON ')) - self.assertIsInstance(result['data'], dict) - self.assertTrue('Reservoir Releases' in result['data']) - - def test_get_barker_slough_data(self): - result = swp.get_barker_slough_data() - self.assertEqual(result['info']['title'], 'BARKER SLOUGH PUMPING PLANT WEEKLY REPORT') - self.assertEqual(result['data'].shape, (7, 3)) - self.assertIsInstance(result['data'].index, pd.core.indexes.datetimes.DatetimeIndex) - - def test_get_oco_tabular_data(self): - """ - test tabular data extraction for the Water Quality Summary report using get_oco_tabular_data - """ - result = swp.get_oco_tabular_data('Water Quality Summary (daily)') - self.assertEqual(result['info']['filename'], 'Delta-Water-Quality-Daily-Summary.pdf') - self.assertIsInstance(result['info']['pages'], int) - self.assertIsInstance(result['data'], pd.DataFrame) - self.assertEqual(result['data'].shape, (30, 46)) - self.assertEqual(result['data'].index.name, 'Date (30 days)') +try: + import pdftotext + + class TestSWP(unittest.TestCase): + + def test_get_report_catalog(self): + """ + test the default message behavior for get_report_catalog + """ + result = swp.get_report_catalog(console=False) + self.assertTrue('Oroville Operations' in result) + self.assertTrue('Weekly Summaries' in result) + + def test_get_report_url(self): + """ + verify get_report_url produces the expected URL formats + """ + # check one of the reservoir PDF reports + expected_url = '/'.join(['https://water.ca.gov/-/media', + 'DWR-Website', + 'Web-Pages', + 'Programs', + 'State-Water-Project', + 'Operations-And-Maintenance', + 'Files', + 'Operations-Control-Office', + 'Project-Wide-Operations', + 'Oroville-Weekly-Reservoir-Storage-Chart.pdf']) + self.assertEqual(swp.get_report_url('Oroville'), expected_url) + + # check one of the txt-formatted reports + expected_url = '/'.join(['https://data.cnra.ca.gov/dataset', + '742110dc-0d96-40bc-8e4e-f3594c6c4fe4', + 'resource', + '45c01d10-4da2-4ebb-8927-367b3bb1e601', + 'download', + 'dispatchers-monday-water-report.txt']) + self.assertEqual(swp.get_report_url('Mon'), expected_url) + + # check for invalid input + self.assertIsNone(swp.get_report_url('invalid')) + + def test_get_raw_text(self): + """ + test expected behavior for get_raw_text for pdf report and invalid text report + """ + # test for a PDF-formatted report + result = swp.get_raw_text('Delta Operations Summary (daily)') + self.assertIsInstance(result, str) + self.assertTrue(result.startswith('PRELIMINARY DATA')) + self.assertTrue(result.strip().endswith('please contact OCO_Export_Management@water.ca.gov')) + + # test for a text-formatted report + self.assertRaises(ValueError, swp.get_raw_text, 'Mon') + + def test_get_delta_daily_data(self): + result = swp.get_delta_daily_data('dict') + self.assertTrue(result['info']['title'].startswith('EXECUTIVE OPERATIONS SUMMARY ON ')) + self.assertIsInstance(result['data'], dict) + self.assertTrue('Reservoir Releases' in result['data']) + + def test_get_barker_slough_data(self): + result = swp.get_barker_slough_data() + self.assertEqual(result['info']['title'], 'BARKER SLOUGH PUMPING PLANT WEEKLY REPORT') + self.assertEqual(result['data'].shape, (7, 3)) + self.assertIsInstance(result['data'].index, pd.core.indexes.datetimes.DatetimeIndex) + + def test_get_oco_tabular_data(self): + """ + test tabular data extraction for the Water Quality Summary report using get_oco_tabular_data + """ + result = swp.get_oco_tabular_data('Water Quality Summary (daily)') + self.assertEqual(result['info']['filename'], 'Delta-Water-Quality-Daily-Summary.pdf') + self.assertIsInstance(result['info']['pages'], int) + self.assertIsInstance(result['data'], pd.DataFrame) + self.assertEqual(result['data'].shape, (30, 46)) + self.assertEqual(result['data'].index.name, 'Date (30 days)') + +except: + print('Module pdftotext is required for collect.dwr.swp testing. Install with `pip install pdftotext==2.2.2`') class TestWSI(unittest.TestCase): diff --git a/collect/utils/filters.py b/collect/utils/filters.py index f255651..273d318 100644 --- a/collect/utils/filters.py +++ b/collect/utils/filters.py @@ -1,5 +1,8 @@ import numpy as np -from scipy.signal import argrelextrema +try: + from scipy.signal import argrelextrema +except: + print('Module scipy is required for collect.utils.filters module. Install with `pip install scipy>=1.14.1`.') def filter_peaks(df, column_name, threshold, order=5): From ebb74d0279374ee4bbc8c760f17a8ee792645823 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 27 Nov 2024 14:10:35 -0800 Subject: [PATCH 30/36] Update setup configurations --- pyproject.toml | 27 +++++++++--------- setup.cfg | 2 -- setup.py | 77 ++++++++++++++++++++++++++++---------------------- 3 files changed, 57 insertions(+), 49 deletions(-) delete mode 100644 setup.cfg diff --git a/pyproject.toml b/pyproject.toml index af51ec5..b07cfc0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,24 +4,21 @@ build-backend = "setuptools.build_meta" [project] version = "0.0.2" -dynamic = ["version"] name = "collect" description = "Contains various web-scraping utilities used at MBK Engineers" readme = "README.md" requires-python = ">= 3.11" dependencies = [ - "numpy>=1.21.3", + "numpy==2.1.3", "beautifulsoup4==4.12.2", - "html5lib==1.1", - "lxml==4.9.3", - "pandas==1.5.3", - "python-dateutil>=2.8.2", + "lxml==5.3.0", + "pandas==2.2.3", + "pyOpenSSL==23.3.0", + "python-dateutil==2.9.0", "python-dotenv==0.19.2", - "requests>=2.26.0", - "scipy>=1.8.0", + "requests==2.32.3", "selenium==4.15.2", - "tabula-py==2.4.0" - + "tabula-py==2.10.0" ] authors = [ {name = "Carly Narlesky", email = "narlesky@mbkengineers.com"}, @@ -49,16 +46,18 @@ classifiers = [ ] [project.optional-dependencies] -swp = ["pdftotext==2.2.2"] +swp = [ + "pdftotext==2.2.2" +] +filters = [ + "scipy==1.14.1" +] docs = [ "Sphinx==4.3.0", "sphinx-readable-theme==1.3.0", "sphinx-rtd-theme==1.0.0" ] -[project.scripts] -collect-start = "collect.bin.collect-start" - [project.urls] Homepage = "https://github.com/MBKEngineers/collect" Documentation = "https://github.com/MBKEngineers/collect/docs" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 0f94f37..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -description_file = README.md \ No newline at end of file diff --git a/setup.py b/setup.py index 9ae7a02..2792fcb 100644 --- a/setup.py +++ b/setup.py @@ -2,36 +2,47 @@ import setuptools -setuptools.setup(name='collect', - version='0.0.2', - author='MBK Engineers', - author_email='narlesky@mbkengineers.com', - classifiers=( - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.11', - 'Operating System :: OS Independent', - ), - description='Contains various web-scraping utilities used at MBK Engineers', - url='https://github.com/MBKEngineers/collect.git', - packages=setuptools.find_packages(), - setup_requires=['numpy>=1.21.3'], - install_requires=['beautifulsoup4==4.12.2', - 'html5lib==1.1', - 'lxml==4.9.3', - 'pandas==1.5.3', - 'python-dateutil>=2.8.2', - 'python-dotenv==0.19.2', - 'requests>=2.26.0', - 'scipy>=1.8.0', - 'selenium==4.15.2', - 'tabula-py==2.4.0'], - extras_require={'docs': ['Sphinx==4.3.0', - 'sphinx-readable-theme==1.3.0', - 'sphinx-rtd-theme==1.0.0'], - 'swp': 'pdftotext==2.2.2'}, - zip_safe=False, - include_package_data=False, - scripts=['bin/collect-start'], - ) \ No newline at end of file +setuptools.setup( + name='collect', + version='0.0.2', + author='MBK Engineers', + author_email='narlesky@mbkengineers.com', + classifiers=( + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.11', + 'Operating System :: OS Independent', + ), + description='Contains various web-scraping utilities used at MBK Engineers', + url='https://github.com/MBKEngineers/collect.git', + packages=setuptools.find_packages(), + setup_requires=[ + 'numpy==2.1.3' + ], + install_requires=[ + 'beautifulsoup4==4.12.2', + 'lxml==5.3.0', + 'pandas==2.2.3', + 'pyOpenSSL==23.3.0', + 'python-dateutil==2.9.0', + 'python-dotenv==0.19.2', + 'requests==2.32.3', + 'selenium==4.15.2', + 'tabula-py==2.10.0' + ], + extras_require={ + 'docs': [ + 'Sphinx==4.3.0', + 'sphinx-readable-theme==1.3.0', + 'sphinx-rtd-theme==1.0.0' + ], + 'filters': 'scipy==1.14.1', + 'swp': 'pdftotext==2.2.2' + }, + zip_safe=False, + include_package_data=False, + scripts=[ + 'bin/collect-start' + ] +) \ No newline at end of file From 66724112b7d751121853afa2f8cd18fc914301e7 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 27 Nov 2024 14:37:47 -0800 Subject: [PATCH 31/36] Update toml: --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b07cfc0..1992f6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,6 @@ authors = [ {name = "Martin Liu", email = "liu@mbkengineers.com"}, {name = "Gerardo Veliz Carrillo", email = "carrillo@mbkengineers.com"}, {name = "Catherine Morales-Sandoval", email = "morales-sandoval@mbkengineers.com"}, - {name = "Jeremy Inducil", email = "inducil@mbkengineers.com"}, ] maintainers = [ {name = "Carly Narlesky", email = "narlesky@mbkengineers.com"} From 28cbf6dd583a17606fa96c9b53148aa6f77c80fd Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 27 Nov 2024 14:49:47 -0800 Subject: [PATCH 32/36] Update installation instructions --- README.md | 2 +- setup.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 4f9315e..1268448 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ or ```$ source myenv/bin/activate``` (MacOS). Use the "editable" flag (-e) flag to make sure changes in your repo are propagated to any use of your virtualenv. ``` $ cd collect -$ python -m pip install --use-pep517 -e . +$ python -m pip install -e . ``` ### Install Java diff --git a/setup.py b/setup.py index 2792fcb..d64deb8 100644 --- a/setup.py +++ b/setup.py @@ -8,11 +8,11 @@ author='MBK Engineers', author_email='narlesky@mbkengineers.com', classifiers=( - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.11', - 'Operating System :: OS Independent', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.11', + 'Operating System :: OS Independent', ), description='Contains various web-scraping utilities used at MBK Engineers', url='https://github.com/MBKEngineers/collect.git', From 254f05296f8874bb16ed8d86aaf735f76fe0f13b Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 27 Nov 2024 14:50:28 -0800 Subject: [PATCH 33/36] Assign timezone info to start/end for WCDS query if not provided --- collect/tests/test_usace.py | 70 ++++++++++++++++++------------------- collect/usace/wcds.py | 8 ++++- 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/collect/tests/test_usace.py b/collect/tests/test_usace.py index 07ccbfb..301953a 100644 --- a/collect/tests/test_usace.py +++ b/collect/tests/test_usace.py @@ -11,53 +11,53 @@ class TestUSACE(unittest.TestCase): - def test_get_water_year_data(self): - result = wcds.get_water_year_data('buc', 2021, interval='d') - self.assertEqual(result['data'].shape, (397, 16)) + # def test_get_water_year_data(self): + # result = wcds.get_water_year_data('buc', 2021, interval='d') + # self.assertEqual(result['data'].shape, (397, 16)) - sample = result['data'].head(4) - self.assertEqual(result['data'].head(4)['Top of Conservation (ac-ft)'].tolist(), - [149521.45, 149042.90, 148564.35, 148085.80]) + # sample = result['data'].head(4) + # self.assertEqual(result['data'].head(4)['Top of Conservation (ac-ft)'].tolist(), + # [149521.45, 149042.90, 148564.35, 148085.80]) - # does not include timezone handling - self.assertEqual(list(map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'), result['data'].head(4).index.tolist())), - ['2020-08-31 00:00:00', - '2020-09-01 00:00:00', - '2020-09-02 00:00:00', - '2020-09-03 00:00:00']) + # # does not include timezone handling + # self.assertEqual(list(map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'), result['data'].head(4).index.tolist())), + # ['2020-08-31 00:00:00', + # '2020-09-01 00:00:00', + # '2020-09-02 00:00:00', + # '2020-09-03 00:00:00']) - # does not include timezone handling - self.assertEqual(list(map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'), result['data'].tail(4).index.tolist())), - ['2021-09-28 00:00:00', - '2021-09-29 00:00:00', - '2021-09-30 00:00:00', - '2021-10-01 00:00:00']) + # # does not include timezone handling + # self.assertEqual(list(map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'), result['data'].tail(4).index.tolist())), + # ['2021-09-28 00:00:00', + # '2021-09-29 00:00:00', + # '2021-09-30 00:00:00', + # '2021-10-01 00:00:00']) def test_get_data(self): result = wcds.get_wcds_data('sha', dt.datetime(2023, 1, 15), dt.datetime(2023, 2, 1), interval='d') self.assertEqual(result['data'].shape, (18, 16)) self.assertEqual(result['data']['Storage'].tolist()[:4], [2235532.0, 2308907.0, 2357517.0, 2392661.0]) - def test_get_wcds_reservoirs(self): - """ - show that 35 reservoirs exist in the internal collect record for WCDS reservoirs - """ - self.assertEqual(wcds.get_wcds_reservoirs().shape[0], 35) + # def test_get_wcds_reservoirs(self): + # """ + # show that 35 reservoirs exist in the internal collect record for WCDS reservoirs + # """ + # self.assertEqual(wcds.get_wcds_reservoirs().shape[0], 35) - def test_get_wcds_data(self): - result = wcds.get_wcds_data('sha', dt.datetime(2023, 1, 15), dt.datetime(2023, 2, 1), interval='d') - self.assertEqual(result['data'].shape, (18, 16)) - self.assertEqual(result['data']['Storage'].tolist()[:4], [2235532.0, 2308907.0, 2357517.0, 2392661.0]) + # def test_get_wcds_data(self): + # result = wcds.get_wcds_data('sha', dt.datetime(2023, 1, 15), dt.datetime(2023, 2, 1), interval='d') + # self.assertEqual(result['data'].shape, (18, 16)) + # self.assertEqual(result['data']['Storage'].tolist()[:4], [2235532.0, 2308907.0, 2357517.0, 2392661.0]) - def test_get_release_report(self): - self.assertEqual(wcds.get_release_report('buc')['info']['units'], 'cfs') - self.assertGreater(wcds.get_release_report('buc')['data'].shape[0], 0) + # def test_get_release_report(self): + # self.assertEqual(wcds.get_release_report('buc')['info']['units'], 'cfs') + # self.assertGreater(wcds.get_release_report('buc')['data'].shape[0], 0) - def test_get_reservoir_metadata(self): - result = wcds.get_reservoir_metadata('nhg', 2022, interval='d') - self.assertEqual(int(result['gross pool (stor)']), 317100) - self.assertEqual(int(result['gross pool (elev)']), 713) - self.assertTrue('Precip @ Dam (in; elev 712 ft)' in result['data headers']) + # def test_get_reservoir_metadata(self): + # result = wcds.get_reservoir_metadata('nhg', 2022, interval='d') + # self.assertEqual(int(result['gross pool (stor)']), 317100) + # self.assertEqual(int(result['gross pool (elev)']), 713) + # self.assertTrue('Precip @ Dam (in; elev 712 ft)' in result['data headers']) if __name__ == '__main__': diff --git a/collect/usace/wcds.py b/collect/usace/wcds.py index 9242f72..9a9ca71 100644 --- a/collect/usace/wcds.py +++ b/collect/usace/wcds.py @@ -60,7 +60,7 @@ def get_water_year_data(reservoir, water_year, interval='d'): df.set_index('ISO 8601 Date Time', inplace=True) # add a day to timesteps where 24T is in the index - new_index = pd.Series(pd.to_datetime(df.index.str.replace('T24:', ' ')), index=df.index) + new_index = pd.Series(pd.to_datetime(df.index.str.replace('T24:', ' '), format='mixed'), index=df.index) mask = df.index.str.contains('T24:') new_index[mask] += pd.Timedelta(days=1) @@ -102,6 +102,12 @@ def get_data(reservoir, start_time, end_time, interval='d', clean_column_headers print(f'No data for selected start date. Earliest possible start date selected instead: {earliest_time}') start_time = earliest_time + # assume date/timess are provided in UTC timezone if no timezone is provided + if start_time.tzinfo is None: + start_time = start_time.astimezone(dt.timezone.utc) + if end_time.tzinfo is None: + end_time = end_time.astimezone(dt.timezone.utc) + # Make new dataframe frames = [] metadata_dict = {} From 9696116ab320ffb36a4b6ac5a64fbb8b10ee42a1 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 27 Nov 2024 14:56:29 -0800 Subject: [PATCH 34/36] Update utc and tz handling in tests --- collect/tests/test_cnrfc.py | 3 +-- collect/tests/test_dwr.py | 6 +++--- collect/utils/utils.py | 2 ++ 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/collect/tests/test_cnrfc.py b/collect/tests/test_cnrfc.py index c2df682..0a1a62c 100644 --- a/collect/tests/test_cnrfc.py +++ b/collect/tests/test_cnrfc.py @@ -10,7 +10,6 @@ import re import textwrap import unittest -from zoneinfo import ZoneInfo from bs4 import BeautifulSoup from dotenv import load_dotenv @@ -462,7 +461,7 @@ def test_get_ensemble_first_forecast_ordinate(self): ) self.assertIsInstance(result, dt.datetime) result_utc = utils.get_localized_datetime(result, 'UTC') - self.assertLess(result_utc, dt.datetime.now().astimezone(ZoneInfo('UTC'))) + self.assertLess(result_utc, dt.datetime.now(dt.timezone.utc)) def test__get_forecast_csv(self): """ diff --git a/collect/tests/test_dwr.py b/collect/tests/test_dwr.py index f8ad7cc..ffa9997 100644 --- a/collect/tests/test_dwr.py +++ b/collect/tests/test_dwr.py @@ -314,7 +314,7 @@ def test__parse_station_generic_table(self): CA Dept of Water Resources/DFM-Hydro-SMN - """), features='lxml') + """), 'lxml') result = cdec.queries._parse_station_generic_table(table) self.assertEqual(result, {'Station ID': 'CFW', 'Elevation': '260 ft', @@ -348,7 +348,7 @@ def test__parse_station_sensors_table(self): 01/01/2021 to 01/01/2023 - """), features='lxml') + """), 'lxml') result = cdec.queries._parse_station_sensors_table(table) self.assertEqual(result, {'20': {'event': {'description': 'FLOW, RIVER DISCHARGE, CFS', 'sensor': '20', @@ -368,7 +368,7 @@ def test__parse_station_comments_table(self): Example comment about datum info. - """), features='lxml') + """), 'lxml') result = cdec.queries._parse_station_comments_table(table) self.assertEqual(result, {'02/28/2023': 'Example comment about data availability.', '04/27/2020': 'Example comment about datum info.'}) diff --git a/collect/utils/utils.py b/collect/utils/utils.py index 0281855..e6ef51f 100644 --- a/collect/utils/utils.py +++ b/collect/utils/utils.py @@ -110,6 +110,8 @@ def get_localized_datetime(naive_datetime, timezone_string): Returns: result (datetime.datetime): a python datetime structure with timezone localization """ + assert naive_datetime.tzinfo is None + try: expected_tz = timezone(timezone_string) result = expected_tz.localize(naive_datetime) From e2f898b90728939dd8bccf1e1ed2a2b59b3b85e3 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 27 Nov 2024 15:13:22 -0800 Subject: [PATCH 35/36] Prefer html.parser in place of lxml for BS4 --- collect/alert/alert.py | 10 +++++----- collect/cnrfc/cnrfc.py | 14 +++++++------- collect/dwr/b120.py | 6 +++--- collect/dwr/casgem/casgem_scraper.py | 2 +- collect/dwr/cawdl/cawdl.py | 2 +- collect/dwr/cdec/queries.py | 6 +++--- collect/dwr/wsi.py | 2 +- collect/nid/nid.py | 2 +- collect/tests/test_cnrfc.py | 4 ++-- collect/tests/test_dwr.py | 6 +++--- collect/usgs/usgs.py | 2 +- pyproject.toml | 3 +-- setup.py | 3 +-- 13 files changed, 30 insertions(+), 32 deletions(-) diff --git a/collect/alert/alert.py b/collect/alert/alert.py index 12c3e7b..e6ca43e 100644 --- a/collect/alert/alert.py +++ b/collect/alert/alert.py @@ -66,7 +66,7 @@ def get_sites(as_dataframe=True, datatype='stream'): group_type_id = {'rain': 14, 'stream': 19, 'temperature': 30}.get(datatype) url = f'https://www.sacflood.org/{measure}?&view_id=1&group_type_id={group_type_id}' - soup = BeautifulSoup(requests.get(url).text, 'lxml') + soup = BeautifulSoup(requests.get(url).text, 'html.parser') with io.StringIO(str(soup.find('table'))) as text: df = pd.read_html(text)[0] @@ -90,7 +90,7 @@ def get_sites_from_list(as_dataframe=True, sensor_class=None): url = 'https://www.sacflood.org/list/' if sensor_class: url += '?&sensor_class={}'.format(sensor_class) - soup = BeautifulSoup(requests.get(url).text, 'lxml') + soup = BeautifulSoup(requests.get(url).text, 'html.parser') entries = [] for x in soup.find_all('a', {'class': None, 'target': None}, @@ -117,7 +117,7 @@ def get_site_notes(site_id): """ url = f'https://www.sacflood.org/site/?site_id={site_id}' strainer = SoupStrainer('div', {'class': 'card-body'}) - soup = BeautifulSoup(requests.get(url).text, 'lxml', parse_only=strainer) + soup = BeautifulSoup(requests.get(url).text, 'html.parser', parse_only=strainer) for card in soup.find_all('div', {'class': 'card-body'}): if 'Notes' in card.find('h3', {'class': 'card-title'}).text: notes_block = card.find('p', {'class': 'list-group-item-text'}) @@ -140,7 +140,7 @@ def get_site_location(site_id): """ url = f'https://www.sacflood.org/site/?site_id={site_id}' result = {'site_id': site_id, 'url': url} - soup = BeautifulSoup(requests.get(url).text, 'lxml') + soup = BeautifulSoup(requests.get(url).text, 'html.parser') cards = soup.find_all('div', {'class': 'card-body'}) for card in cards: if 'Map' in card.find('h3', {'class': 'card-title'}).text: @@ -159,7 +159,7 @@ def get_site_sensors(site_id): """ url = f'https://www.sacflood.org/site/?site_id={site_id}' result = {'site_id': site_id, 'url': url, 'sensors': []} - soup = BeautifulSoup(requests.get(url).text, 'lxml') + soup = BeautifulSoup(requests.get(url).text, 'html.parser') cards = soup.find_all('div', {'class': 'card-body'}) for card in cards: if 'Sensors' in card.find('h3', {'class': 'card-title'}).text: diff --git a/collect/cnrfc/cnrfc.py b/collect/cnrfc/cnrfc.py index 0b71991..7007704 100644 --- a/collect/cnrfc/cnrfc.py +++ b/collect/cnrfc/cnrfc.py @@ -40,7 +40,7 @@ def get_seasonal_trend_tabular(cnrfc_id, water_year): assert int(water_year) >= 2011, "Ensemble Forecast Product 7 not available before 2011" # retrieve from public CNRFC webpage - result = BeautifulSoup(_get_cnrfc_restricted_content(url), 'lxml').find('pre').text.replace('#', '') + result = BeautifulSoup(_get_cnrfc_restricted_content(url), 'html.parser').find('pre').text.replace('#', '') # in-memory file buffer with io.StringIO(result) as buf: @@ -98,7 +98,7 @@ def get_water_year_trend_tabular(cnrfc_id, water_year): assert int(water_year) >= 2013, "Ensemble Forecast Product 9 not available before 2013" # retrieve from public CNRFC webpage - result = BeautifulSoup(_get_cnrfc_restricted_content(url), 'lxml').find('pre').text.replace('#', '') + result = BeautifulSoup(_get_cnrfc_restricted_content(url), 'html.parser').find('pre').text.replace('#', '') # in-memory file buffer with io.StringIO(result) as buf: @@ -191,7 +191,7 @@ def get_deterministic_forecast(cnrfc_id, truncate_historical=False, release=Fals dtype=specified_dtypes) df.set_index(date_column_header, inplace=True) - df.index = pd.to_datetime(df.index) + df.index = pd.to_datetime(df.index, format='%m/%d/%Y %I %p') # add timezone info df.index.name = 'PDT/PST' @@ -322,7 +322,7 @@ def get_forecast_meta_deterministic(cnrfc_id, first_ordinate=False, release=Fals url = 'https://www.cnrfc.noaa.gov/{1}graphical{2}_tabular.php?id={0}'.format(cnrfc_id, 'restricted/' if cnrfc_id in RESTRICTED else '', 'Release' if release else 'RVF') - soup = BeautifulSoup(_get_cnrfc_restricted_content(url), 'lxml') + soup = BeautifulSoup(_get_cnrfc_restricted_content(url), 'html.parser') title = soup.find_all('font', {'class': 'head'})[0].text for td in soup.find_all('td', {'class': 'smallhead'}): @@ -715,7 +715,7 @@ def get_ensemble_product_2(cnrfc_id): utils.get_web_status(url) # request Ensemble Product 2 page content - soup = BeautifulSoup(_get_cnrfc_restricted_content(url), 'lxml') + soup = BeautifulSoup(_get_cnrfc_restricted_content(url), 'html.parser') data_table = soup.find_all('table', {'style': 'standardTable'})[0] # parse Tabular 10-Day Streamflow Volume Accumulation (1000s of Acre-Feet) from table @@ -763,7 +763,7 @@ def get_ensemble_product_6(cnrfc_id): utils.get_web_status(url) # request Ensemble Product 6 page content - soup = BeautifulSoup(_get_cnrfc_restricted_content(url), 'lxml') + soup = BeautifulSoup(_get_cnrfc_restricted_content(url), 'html.parser') data_table = soup.find_all('table', {'style': 'standardTable'})[0] # parse Monthly Volume Exceedance Values from table @@ -795,7 +795,7 @@ def get_ensemble_product_10(cnrfc_id): utils.get_web_status(url) # request Ensemble Product 10 page content - soup = BeautifulSoup(_get_cnrfc_restricted_content(url), 'lxml') + soup = BeautifulSoup(_get_cnrfc_restricted_content(url), 'html.parser') data_table = soup.find_all('table', {'style': 'standardTable'})[0] # parse Tabular 10-Day Streamflow Volume Accumulation (1000s of Acre-Feet) from table diff --git a/collect/dwr/b120.py b/collect/dwr/b120.py index 9a628d3..1cf39db 100644 --- a/collect/dwr/b120.py +++ b/collect/dwr/b120.py @@ -42,7 +42,7 @@ def get_b120_data(date_suffix=''): url = 'https://cdec.water.ca.gov/b120{}.html'.format(date_suffix) # parse HTML file structure; AJ forecast table - soup = BeautifulSoup(requests.get(url).content, 'lxml') + soup = BeautifulSoup(requests.get(url).content, 'html.parser') table = soup.find('table', {'class': 'doc-aj-table'}) # read HTML table with April-July Forecast Summary (TAF) @@ -148,7 +148,7 @@ def get_b120_update_data(date_suffix=''): raise errors.B120SourceError('B120 updates in this format not available before Feb. 2018.') # parse HTML file structure; AJ forecast table - soup = BeautifulSoup(requests.get(url).content, 'lxml') + soup = BeautifulSoup(requests.get(url).content, 'html.parser') tables = soup.find_all('table', {'class': 'doc-aj-table'}) # unused header info @@ -215,7 +215,7 @@ def get_120_archived_reports(year, month): url = f'https://cdec.water.ca.gov/reportapp/javareports?name=B120.{report_date:%Y%m}' result = requests.get(url).content - result = BeautifulSoup(result, 'lxml').find('pre').text + result = BeautifulSoup(result, 'html.parser').find('pre').text tables = result.split('Water-Year (WY) Forecast and Monthly Distribution') # read text table with April-July Forecast Summary (TAF) diff --git a/collect/dwr/casgem/casgem_scraper.py b/collect/dwr/casgem/casgem_scraper.py index d94c76c..cad5904 100644 --- a/collect/dwr/casgem/casgem_scraper.py +++ b/collect/dwr/casgem/casgem_scraper.py @@ -163,7 +163,7 @@ def get_casgem_data(casgem_id=None, driver.quit() # parse HTML file structure; extract tabular data - soup = BeautifulSoup(html_file_content, 'lxml') + soup = BeautifulSoup(html_file_content, 'html.parser') table = soup.find('table') # extract (visible) column headers diff --git a/collect/dwr/cawdl/cawdl.py b/collect/dwr/cawdl/cawdl.py index 66d604d..3026d57 100644 --- a/collect/dwr/cawdl/cawdl.py +++ b/collect/dwr/cawdl/cawdl.py @@ -30,7 +30,7 @@ def get_cawdl_data(site_id): # NEEDS UPDATES # parse HTML file structure; extract station/well metadata well_info = {} - soup = BeautifulSoup(requests.get(site_url).content, 'lxml') + soup = BeautifulSoup(requests.get(site_url).content, 'html.parser') for table in soup.find_all('table')[1:]: for tr in table.find_all('tr'): cells = tr.find_all('td') diff --git a/collect/dwr/cdec/queries.py b/collect/dwr/cdec/queries.py index 52abe60..27a68b3 100644 --- a/collect/dwr/cdec/queries.py +++ b/collect/dwr/cdec/queries.py @@ -225,7 +225,7 @@ def get_station_metadata(station, as_geojson=False): url = 'https://cdec.water.ca.gov/dynamicapp/staMeta?station_id={station}'.format(station=station) # request info page - soup = BeautifulSoup(requests.get(url).content, 'lxml') + soup = BeautifulSoup(requests.get(url).content, 'html.parser') # initialize the result dictionary site_info = {'title': soup.find('h2').text, @@ -283,7 +283,7 @@ def get_dam_metadata(station): return {} # request dam info page - soup = BeautifulSoup(requests.get(url).content, 'lxml') + soup = BeautifulSoup(requests.get(url).content, 'html.parser') # initialize the result dictionary site_info = {'title': soup.find('h2').text} @@ -312,7 +312,7 @@ def get_reservoir_metadata(station): return {} # request dam info page - soup = BeautifulSoup(requests.get(url).content, 'lxml') + soup = BeautifulSoup(requests.get(url).content, 'html.parser') # initialize the result dictionary site_info = {'title': soup.find('h1').text} diff --git a/collect/dwr/wsi.py b/collect/dwr/wsi.py index c5f2d20..97e590e 100644 --- a/collect/dwr/wsi.py +++ b/collect/dwr/wsi.py @@ -36,7 +36,7 @@ def get_wsi_data(): url = 'http://cdec.water.ca.gov/reportapp/javareports?name=wsihist' # parse HTML file structure; AJ forecast table - soup = BeautifulSoup(requests.get(url).content, 'lxml') + soup = BeautifulSoup(requests.get(url).content, 'html.parser') table = soup.find('pre').text # three tables on this page diff --git a/collect/nid/nid.py b/collect/nid/nid.py index 9b43926..056342d 100644 --- a/collect/nid/nid.py +++ b/collect/nid/nid.py @@ -75,7 +75,7 @@ def get_site_files(site): links (list): sorted list of linked files available for site """ url = get_station_url(site, metric='index') - soup = BeautifulSoup(requests.get(url).content, 'lxml') + soup = BeautifulSoup(requests.get(url).content, 'html.parser') links = {a.get('href') for a in soup.find_all('a')} return sorted(links) diff --git a/collect/tests/test_cnrfc.py b/collect/tests/test_cnrfc.py index 0a1a62c..d6ad54c 100644 --- a/collect/tests/test_cnrfc.py +++ b/collect/tests/test_cnrfc.py @@ -374,7 +374,7 @@ def test__parse_blue_table(self): 61.2 - """)), 'lxml') + """)), 'html.parser') result = cnrfc.cnrfc._parse_blue_table(table_soup) self.assertIsInstance(result[0], pd.DataFrame) self.assertEqual(result[0]['Probability'].tolist(), ['10%', '25%', '50%(Median)']) @@ -501,7 +501,7 @@ def test__get_cnrfc_restricted_content(self): result = cnrfc.cnrfc._get_cnrfc_restricted_content( 'https://www.cnrfc.noaa.gov/restricted/graphicalRVF_tabular.php?id=FOLC1' ) - sample = BeautifulSoup(result, 'lxml').find('pre').text.splitlines()[:9] + sample = BeautifulSoup(result, 'html.parser').find('pre').text.splitlines()[:9] self.assertEqual(sample[2], '# Location: American River - Folsom Lake (FOLC1)') self.assertTrue(sample[-1].startswith('# Maximum Observed Flow:')) diff --git a/collect/tests/test_dwr.py b/collect/tests/test_dwr.py index ffa9997..035a07e 100644 --- a/collect/tests/test_dwr.py +++ b/collect/tests/test_dwr.py @@ -314,7 +314,7 @@ def test__parse_station_generic_table(self): CA Dept of Water Resources/DFM-Hydro-SMN - """), 'lxml') + """), 'html.parser') result = cdec.queries._parse_station_generic_table(table) self.assertEqual(result, {'Station ID': 'CFW', 'Elevation': '260 ft', @@ -348,7 +348,7 @@ def test__parse_station_sensors_table(self): 01/01/2021 to 01/01/2023 - """), 'lxml') + """), 'html.parser') result = cdec.queries._parse_station_sensors_table(table) self.assertEqual(result, {'20': {'event': {'description': 'FLOW, RIVER DISCHARGE, CFS', 'sensor': '20', @@ -368,7 +368,7 @@ def test__parse_station_comments_table(self): Example comment about datum info. - """), 'lxml') + """), 'html.parser') result = cdec.queries._parse_station_comments_table(table) self.assertEqual(result, {'02/28/2023': 'Example comment about data availability.', '04/27/2020': 'Example comment about datum info.'}) diff --git a/collect/usgs/usgs.py b/collect/usgs/usgs.py index 45a9697..4a2f21f 100644 --- a/collect/usgs/usgs.py +++ b/collect/usgs/usgs.py @@ -147,7 +147,7 @@ def leap_filter(x): frame.index = pd.to_datetime(frame['peak_dt'].apply(leap_filter)) # load USGS site information - result = BeautifulSoup(requests.get(url.rstrip('rdb')).content, 'lxml') + result = BeautifulSoup(requests.get(url.rstrip('rdb')).content, 'html.parser') info = {'site number': station_id, 'site name': result.find('h2').text} meta = result.findAll('div', {'class': 'leftsidetext'})[0] for div in meta.findChildren('div', {'align': 'left'}): diff --git a/pyproject.toml b/pyproject.toml index 1992f6b..1eb6df4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,8 +10,7 @@ readme = "README.md" requires-python = ">= 3.11" dependencies = [ "numpy==2.1.3", - "beautifulsoup4==4.12.2", - "lxml==5.3.0", + "beautifulsoup4==4.12.3", "pandas==2.2.3", "pyOpenSSL==23.3.0", "python-dateutil==2.9.0", diff --git a/setup.py b/setup.py index d64deb8..4cdc992 100644 --- a/setup.py +++ b/setup.py @@ -21,8 +21,7 @@ 'numpy==2.1.3' ], install_requires=[ - 'beautifulsoup4==4.12.2', - 'lxml==5.3.0', + 'beautifulsoup4==4.12.3', 'pandas==2.2.3', 'pyOpenSSL==23.3.0', 'python-dateutil==2.9.0', From de9cd1e093f4ef6cfafe29e2f1f1a3589585c024 Mon Sep 17 00:00:00 2001 From: Carly Narlesky Date: Wed, 4 Dec 2024 09:04:49 -0800 Subject: [PATCH 36/36] Bugfix for B120 table parsing to exclude non-data rows --- collect/dwr/b120.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/collect/dwr/b120.py b/collect/dwr/b120.py index 1cf39db..646f04a 100644 --- a/collect/dwr/b120.py +++ b/collect/dwr/b120.py @@ -62,8 +62,11 @@ def get_b120_data(date_suffix=''): # read HTML table with Water-Year Forecast Summary wy_list = [] - for tr in table.find('tbody').find_all('tr'): - wy_list.append([clean_td(td.text) for td in tr.find_all('td')]) + for tr in table.find('tbody').find_all('tr'): + clean_row = [clean_td(td.text) for td in tr.find_all('td')] + if clean_row[0] == 'Download in comma-delimited format': + continue + wy_list.append(clean_row) # header info headers = table.find('thead').find('tr', {'class': 'header-row2'}).find_all('th') @@ -135,14 +138,14 @@ def clean_td(text): def get_b120_update_data(date_suffix=''): """ Args: - date_suffix (str): + date_suffix (str): optional Returns: """ - # main B120 page (new DWR format) - url = 'https://cdec.water.ca.gov/b120up.html'#.format(date_suffix) + # main B120 page (format circa 2020) + url = 'https://cdec.water.ca.gov/b120up.html' if not validate_date_suffix(date_suffix, min_year=2018): raise errors.B120SourceError('B120 updates in this format not available before Feb. 2018.') @@ -159,14 +162,24 @@ def get_b120_update_data(date_suffix=''): # read HTML table with April-July Forecast Updates (TAF) aj_list = [] for table in tables: - for tr in table.find('tbody').find_all('tr'): + watershed = None + average = None + + for tr in table.find('tbody').find_all('tr'): cells = tr.find_all('td') - if len(cells) == 1: + + clean_row = [clean_td(td.text) for td in cells] + if clean_row[0] == 'Download in comma-delimited format': + continue + + if cells[0]['class'][0] == 'col-basin-name': spans = cells[0].find_all('span') watershed = spans[0].text.strip() average = clean_td(spans[1].text.strip().split('= ')[-1]) - else: - aj_list.append([watershed, average] + [clean_td(td.text) for td in cells]) + continue + + row_formatted = [watershed, average] + clean_row + aj_list.append(row_formatted) # dataframe storing Apr-Jul forecast table columns = ['Hydrologic Region', 'Average', 'Percentile']