Skip to content

Commit

Permalink
Run pre-commit run --all-files
Browse files Browse the repository at this point in the history
This cleans up the project through the pre-commit pipeline.
  • Loading branch information
lukecampbell committed Oct 19, 2022
1 parent 73f4544 commit 3f889d7
Show file tree
Hide file tree
Showing 13 changed files with 220 additions and 131 deletions.
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ repos:
hooks:
- id: isort
additional_dependencies: [toml]
exclude: ^(docs|setup.py)
args: [--project=gcm_filters, --multi-line=3, --lines-after-imports=2, --lines-between-types=1, --trailing-comma, --force-grid-wrap=0, --use-parentheses, --line-width=88]

- repo: https://github.com/asottile/seed-isort-config
Expand All @@ -44,6 +45,7 @@ repos:
rev: stable
hooks:
- id: black
exclude: ^(docs|setup.py)
language_version: python3

# - repo: https://github.com/pre-commit/mirrors-mypy
Expand Down
Binary file removed docs/.Makefile.swp
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
numpydoc
sphinx
sphinx_rtd_theme
numpydoc
20 changes: 10 additions & 10 deletions intake_erddap/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from pkg_resources import DistributionNotFound, get_distribution

import intake
from .erddap import (ERDDAPSource, ERDDAPSourceAutoPartition,
ERDDAPSourceManualPartition)
from .erddap import ERDDAPSource, ERDDAPSourceAutoPartition, ERDDAPSourceManualPartition
from .erddap_cat import ERDDAPCatalog
from .version import __version__


from pkg_resources import DistributionNotFound, get_distribution

try:
__version__ = get_distribution("model_catalogs").version
except DistributionNotFound:
# package is not installed
__version__ = "unknown"
__all__ = [
"ERDDAPSource",
"ERDDAPSourceAutoPartition",
"ERDDAPSourceManualPartition",
"ERDDAPCatalog",
"__version__",
]
142 changes: 86 additions & 56 deletions intake_erddap/erddap.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from intake.source import base
from . import __version__
from erddapy import ERDDAP
from intake.source import base

from .version import __version__


class ERDDAPSource(base.DataSource):
Expand All @@ -21,21 +22,28 @@ class ERDDAPSource(base.DataSource):
constraints: dict
"""
name = 'erddap'

name = "erddap"
version = __version__
container = 'dataframe'
container = "dataframe"
partition_access = True

def __init__(self, server, dataset_id,
protocol='tabledap',
variables=[], constraints={}, metadata={}):
def __init__(
self,
server,
dataset_id,
protocol="tabledap",
variables=[],
constraints={},
metadata={},
):
self._init_args = {
'server': server,
'dataset_id': dataset_id,
'protocol': protocol,
'variables': variables,
'constraints': constraints,
'metadata': metadata,
"server": server,
"dataset_id": dataset_id,
"protocol": protocol,
"variables": variables,
"constraints": constraints,
"metadata": metadata,
}

self._server = server
Expand All @@ -56,18 +64,19 @@ def _load(self):
e.constraints = self._constraints

self._dataframe = e.to_pandas()


def _get_schema(self):
if self._dataframe is None:
# TODO: could do partial read with chunksize to get likely schema from
# first few records, rather than loading the whole thing
self._load()
return base.Schema(datashape=None,
dtype=self._dataframe.dtypes,
shape=self._dataframe.shape,
npartitions=1,
extra_metadata={})
return base.Schema(
datashape=None,
dtype=self._dataframe.dtypes,
shape=self._dataframe.shape,
npartitions=1,
extra_metadata={},
)

def _get_partition(self, _):
if self._dataframe is None:
Expand Down Expand Up @@ -102,18 +111,19 @@ class ERDDAPSourceAutoPartition(base.DataSource):
sql_kwargs: dict
Further arguments to pass to dask.dataframe.read_sql
"""
name = 'erddap_auto'

name = "erddap_auto"
version = __version__
container = 'dataframe'
container = "dataframe"
partition_access = True

def __init__(self, uri, table, index, sql_kwargs={}, metadata={}):
self._init_args = {
'uri': uri,
'sql_expr': table,
'index': index,
'sql_kwargs': sql_kwargs,
'metadata': metadata,
"uri": uri,
"sql_expr": table,
"index": index,
"sql_kwargs": sql_kwargs,
"metadata": metadata,
}

self._uri = uri
Expand All @@ -126,17 +136,21 @@ def __init__(self, uri, table, index, sql_kwargs={}, metadata={}):

def _load(self):
import dask.dataframe as dd
self._dataframe = dd.read_sql_table(self._sql_expr, self._uri,
self._index, **self._sql_kwargs)

self._dataframe = dd.read_sql_table(
self._sql_expr, self._uri, self._index, **self._sql_kwargs
)

def _get_schema(self):
if self._dataframe is None:
self._load()
return base.Schema(datashape=None,
dtype=self._dataframe,
shape=(None, len(self._dataframe.columns)),
npartitions=self._dataframe.npartitions,
extra_metadata={})
return base.Schema(
datashape=None,
dtype=self._dataframe,
shape=(None, len(self._dataframe.columns)),
npartitions=self._dataframe.npartitions,
extra_metadata={},
)

def _get_partition(self, i):
if self._dataframe is None:
Expand All @@ -146,7 +160,7 @@ def _get_partition(self, i):
def to_dask(self):
self._get_schema()
return self._dataframe

def read(self):
self._get_schema()
return self._dataframe.compute()
Expand Down Expand Up @@ -187,47 +201,61 @@ class ERDDAPSourceManualPartition(base.DataSource):
sql_kwargs: dict
Further arguments to pass to pd.read_sql_query
"""
name = 'erddap_manual'

name = "erddap_manual"
version = __version__
container = 'dataframe'
container = "dataframe"
partition_access = True

def __init__(self, uri, sql_expr, where_values, where_template=None,
sql_kwargs={}, metadata={}):
def __init__(
self,
uri,
sql_expr,
where_values,
where_template=None,
sql_kwargs={},
metadata={},
):
self._init_args = {
'uri': uri,
'sql_expr': sql_expr,
'where': where_values,
'where_tmp': where_template,
'sql_kwargs': sql_kwargs,
'metadata': metadata,
"uri": uri,
"sql_expr": sql_expr,
"where": where_values,
"where_tmp": where_template,
"sql_kwargs": sql_kwargs,
"metadata": metadata,
}

self._uri = uri
self._sql_expr = sql_expr # TODO: may check for table and expand to
# "SELECT * FROM {table}"
# "SELECT * FROM {table}"
self._sql_kwargs = sql_kwargs
self._where = where_values
self._where_tmp = where_template
self._dataframe = None
self._meta = self._sql_kwargs.pop('meta', None)
self._meta = self._sql_kwargs.pop("meta", None)

super(SQLSourceManualPartition, self).__init__(metadata=metadata)

def _load(self):
self._dataframe = read_sql_query(self._uri, self._sql_expr,
self._where, where_tmp=self._where_tmp,
meta=self._meta,
kwargs=self._sql_kwargs)
self._dataframe = read_sql_query(
self._uri,
self._sql_expr,
self._where,
where_tmp=self._where_tmp,
meta=self._meta,
kwargs=self._sql_kwargs,
)

def _get_schema(self):
if self._dataframe is None:
self._load()
return base.Schema(datashape=None,
dtype=self._dataframe,
shape=(None, len(self._dataframe.columns)),
npartitions=self._dataframe.npartitions,
extra_metadata={})
return base.Schema(
datashape=None,
dtype=self._dataframe,
shape=(None, len(self._dataframe.columns)),
npartitions=self._dataframe.npartitions,
extra_metadata={},
)

def _get_partition(self, i):
if self._dataframe is None:
Expand All @@ -248,7 +276,8 @@ def _close(self):

def load_part(sql, engine, where, kwargs, meta=None):
import pandas as pd
sql = sql + ' ' + where

sql = sql + " " + where
df = pd.read_sql(sql, engine, **kwargs)
if meta is not None:
if df.empty:
Expand Down Expand Up @@ -286,6 +315,7 @@ def read_sql_query(uri, sql, where, where_tmp=None, meta=None, kwargs=None):
"""
import dask
import dask.dataframe as dd

if where_tmp is not None:
where = [where_tmp.format(values) for values in where]
if kwargs is None:
Expand Down
Loading

0 comments on commit 3f889d7

Please sign in to comment.