Skip to content

Commit

Permalink
trying to fix dataload issue
Browse files Browse the repository at this point in the history
  • Loading branch information
kaiguender committed Nov 11, 2022
1 parent 02de0d9 commit 24a0efd
Show file tree
Hide file tree
Showing 20 changed files with 424 additions and 41 deletions.
6 changes: 3 additions & 3 deletions .ipynb_checkpoints/settings-checkpoint.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
### Python library ###
repo = dddex
lib_name = %(repo)s
version = 0.0.3
version = 0.0.4
min_python = 3.7
license = apache2

Expand All @@ -30,13 +30,13 @@ audience = Developers
author = kaiguender
author_email = kai.guender@yahoo.de
copyright = 2022 onwards, %(author)s
description = To be added.
description = The package "data-driven density estimation x" (dddex) turns any standard point forecasting model into an estimator of the underlying conditional density
keywords = nbdev jupyter notebook python
language = English
status = 3
user = kaiguender

### Optional ###
requirements = fastcore>=1.5.27 pandas>=1.3.0 sklearn tsfresh>=0.19.0 lightgbm>=3.3.2 tabulate>=0.8.10 ipdb
requirements = fastcore>=1.5.27 pandas>=1.3.0 sklearn tsfresh>=0.19.0 lightgbm>=3.3.2 tabulate>=0.8.10
# dev_requirements =
# console_scripts =
58 changes: 58 additions & 0 deletions .ipynb_checkpoints/setup-checkpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from pkg_resources import parse_version
from configparser import ConfigParser
import setuptools
assert parse_version(setuptools.__version__)>=parse_version('36.2')

# note: all settings are in settings.ini; edit there, not here
config = ConfigParser(delimiters=['='])
config.read('settings.ini')
cfg = config['DEFAULT']

cfg_keys = 'version description keywords author author_email'.split()
expected = cfg_keys + "lib_name user branch license status min_python audience language".split()
for o in expected: assert o in cfg, "missing expected setting: {}".format(o)
setup_cfg = {o:cfg[o] for o in cfg_keys}

licenses = {
'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'),
'mit': ('MIT License', 'OSI Approved :: MIT License'),
'gpl2': ('GNU General Public License v2', 'OSI Approved :: GNU General Public License v2 (GPLv2)'),
'gpl3': ('GNU General Public License v3', 'OSI Approved :: GNU General Public License v3 (GPLv3)'),
'bsd3': ('BSD License', 'OSI Approved :: BSD License'),
}
statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
'4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
py_versions = '3.6 3.7 3.8 3.9 3.10'.split()

requirements = cfg.get('requirements','').split()
if cfg.get('pip_requirements'): requirements += cfg.get('pip_requirements','').split()
min_python = cfg['min_python']
lic = licenses.get(cfg['license'].lower(), (cfg['license'], None))
dev_requirements = (cfg.get('dev_requirements') or '').split()

setuptools.setup(
name = cfg['lib_name'],
license = lic[0],
classifiers = [
'Development Status :: ' + statuses[int(cfg['status'])],
'Intended Audience :: ' + cfg['audience'].title(),
'Natural Language :: ' + cfg['language'].title(),
] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]] + (['License :: ' + lic[1] ] if lic[1] else []),
url = cfg['git_url'],
packages = setuptools.find_packages(),
include_package_data = True,
package_data={'': ['datasets/*.csv']},
install_requires = requirements,
extras_require={ 'dev': dev_requirements },
dependency_links = cfg.get('dep_links','').split(),
python_requires = '>=' + cfg['min_python'],
long_description = open('README.md').read(),
long_description_content_type = 'text/markdown',
zip_safe = False,
entry_points = {
'console_scripts': cfg.get('console_scripts','').split(),
'nbdev': [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d']
},
**setup_cfg)


84 changes: 84 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,90 @@ dataYaz, XTrain, yTrain, XTest, yTest = loadDataYaz(testDays = 28, returnXY = Tr
LGBM = LGBMRegressor(n_jobs = 1)
```

``` python
loadDataYaz??
```

Signature: loadDataYaz(testDays=28, returnXY=True, daysToCut=0)
Docstring: <no docstring>
Source:
def loadDataYaz(testDays = 28, returnXY = True, daysToCut = 0):
currentFile = __file__
scriptPath = os.path.realpath(currentFile) # /home/user/test/my_script.py
dirPath = os.path.dirname(scriptPath) # /home/user/test
dataDirPath = join(dirPath, 'datasets')
dataPath = join(dataDirPath, 'dataYaz.csv')
data = pd.read_csv(dataPath)
# Cutting away daysToCut-many at end of data: Useful for evaluating
# evaluation on data in a rolled manner
cutOffDate = data.dayIndex.max() - daysToCut
data = data[data['dayIndex'] <= cutOffDate].reset_index(drop = True)
# Label
if isinstance(testDays, int):
nDaysTest = testDays
else:
tsSizes = data.groupby(['id']).size()
nDaysTest = int(tsSizes.iloc[0] * testDays)
cutoffDateTest = data.dayIndex.max() - nDaysTest
data['label'] = ['train' if data.dayIndex.iloc[i] <= cutoffDateTest else 'test' for i in range(data.shape[0])]

# Normalize Demand
scalingData = data[data.label == 'train'].groupby('id')['demand'].agg('max').reset_index()
scalingData.rename(columns = {'demand': 'scalingValue'}, inplace = True)
data = pd.merge(data, scalingData, on = 'id')

data['demand'] = data.demand / data.scalingValue

#---

# Add lag features
y = pd.DataFrame(data['demand'])
X = data.drop(columns = ['demand'])

# set lag features
fc_parameters = MinimalFCParameters()

# delete length features
del fc_parameters['length']

# create lag features
X, y = add_lag_features(X = X,
y = y,
column_id = ['id'],
column_sort = 'dayIndex',
feature_dict = fc_parameters,
time_windows = [(7, 7), (14, 14), (28, 28)])
data = pd.concat([y, X], axis = 1)
# Turn y from Series or dataframe to flatted array
y = np.ravel(y)
#---
X = np.array(data.drop(['demand', 'label', 'id'], axis = 1))
XTrain = X[data['label'] == 'train']
yTrain = y[data['label'] == 'train']
XTest = X[data['label'] == 'test']
yTest = y[data['label'] == 'test']
#---
if returnXY:
return data, XTrain, yTrain, XTest, yTest
else:
return data
File: ~/dddex/dddex/loadData.py
Type: function

There are three parameters for
[`LevelSetKDEx`](https://kaiguender.github.io/dddex/levelsetkdex.html#levelsetkdex):

Expand Down
8 changes: 4 additions & 4 deletions _proc/04_loadData.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
"text/markdown": [
"---\n",
"\n",
"[source](https://github.com/kaiguender/dddex/blob/main/dddex/loadData.py#L22){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"[source](https://github.com/kaiguender/dddex/blob/main/dddex/loadData.py#L24){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"\n",
"### loadDataYaz\n",
"\n",
Expand All @@ -58,7 +58,7 @@
"text/plain": [
"---\n",
"\n",
"[source](https://github.com/kaiguender/dddex/blob/main/dddex/loadData.py#L22){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"[source](https://github.com/kaiguender/dddex/blob/main/dddex/loadData.py#L24){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"\n",
"### loadDataYaz\n",
"\n",
Expand Down Expand Up @@ -102,7 +102,7 @@
"text/markdown": [
"---\n",
"\n",
"[source](https://github.com/kaiguender/dddex/blob/main/dddex/loadData.py#L99){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"[source](https://github.com/kaiguender/dddex/blob/main/dddex/loadData.py#L102){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"\n",
"### add_lag_features\n",
"\n",
Expand Down Expand Up @@ -136,7 +136,7 @@
"text/plain": [
"---\n",
"\n",
"[source](https://github.com/kaiguender/dddex/blob/main/dddex/loadData.py#L99){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"[source](https://github.com/kaiguender/dddex/blob/main/dddex/loadData.py#L102){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
"\n",
"### add_lag_features\n",
"\n",
Expand Down
Loading

0 comments on commit 24a0efd

Please sign in to comment.