Skip to content
This repository was archived by the owner on Mar 7, 2025. It is now read-only.

Commit efb8098

Browse files
authored
Merge pull request #27 from Cloud-PG/RL-v5
Rl v5
2 parents fc8dcc8 + 5159e84 commit efb8098

File tree

225 files changed

+6192
-502
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

225 files changed

+6192
-502
lines changed

.vscode/tasks.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"tasks": [{
66
"label": "build simulator",
77
"type": "shell",
8-
"command": "pipenv run python -m utils compile --fast 'true'",
8+
"command": "pipenv run python -m utils compile --fast",
99
"problemMatcher": [],
1010
"group": {
1111
"kind": "build",

Pipfile

+4
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,7 @@ verify_ssl = true
66
[dev-packages]
77
pylama = "*"
88
black = "*"
9+
ipython = "*"
10+
11+
[pipenv]
12+
allow_prereleases = true

Pipfile.lock

+116-8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Probe/probe/loaders.py

+38-46
Original file line numberDiff line numberDiff line change
@@ -3,44 +3,45 @@
33
from os import path
44

55
import numpy as np
6+
67
# import modin.pandas as pd
78
import pandas as pd
89
from tqdm import tqdm
910

1011
from .utils import STATUS_ARROW, STATUS_WARNING
1112

12-
__all__ = ['csv_data']
13+
__all__ = ["csv_data"]
1314

1415

15-
def _load_csv_file(input_path: str, region_filter: str = None,
16-
file_type_filter: str = None) -> 'pd.DataFrame':
16+
def _load_csv_file(
17+
input_path: str, region_filter: str = None, file_type_filter: str = None
18+
) -> "pd.DataFrame":
1719
"""Load a csv data file.
1820
1921
:raises Exception: File type not supported
2022
:raises Exception: Compressed file type not supported
2123
:return: The data content
2224
:rtype: pandas.DataFrame
2325
"""
24-
print(
25-
f"{STATUS_ARROW}Open file: {STATUS_WARNING(input_path)}\x1b[0K",
26-
end="\r"
27-
)
26+
print(f"{STATUS_ARROW}Open file: {STATUS_WARNING(input_path)}\x1b[0K", end="\r")
2827
head, tail = path.splitext(input_path)
29-
if tail in ['.gz', 'gzip']:
28+
if tail in [".gz", "gzip"]:
3029
head, tail = path.splitext(head)
3130
if tail == ".csv":
3231
with gzip.GzipFile(input_path, "rb") as data_file:
3332
df = pd.read_csv(data_file, index_col=False)
34-
df['day'] = pd.to_datetime(df.reqDay, unit="s")
33+
df["day"] = pd.to_datetime(df.reqDay, unit="s")
3534
df.reset_index(drop=True, inplace=True)
3635
else:
3736
raise Exception(
38-
f"Input {input_path} with file type '{tail}' is not supported...")
39-
elif tail == '.csv':
37+
f"Input {input_path} with file type '{tail}' is not supported..."
38+
)
39+
elif tail == ".csv":
4040
df = pd.read_csv(input_path, index_col=False)
4141
else:
4242
raise Exception(
43-
f"Input {input_path} with file type '{tail}' is not supported...")
43+
f"Input {input_path} with file type '{tail}' is not supported..."
44+
)
4445

4546
if region_filter and region_filter != "all":
4647
if df.SiteName.dtype != np.int64:
@@ -65,49 +66,45 @@ def _get_month(filename: str) -> int:
6566
:return: the number of the month found inthe filename
6667
:rtype: int
6768
"""
68-
prefix = "results_numeric" if filename.find(
69-
"results_numeric") != -1 else "results_"
69+
prefix = "results_numeric" if filename.find("results_numeric") != -1 else "results_"
7070
return int(filename.split(".")[0].replace(prefix, "").split("-")[1])
7171

7272

73-
def gen_csv_data(input_path: str, region_filter: str = None,
74-
file_type_filter: str = None,
75-
month_filter: int = -1) -> 'pd.DataFrame':
73+
def gen_csv_data(
74+
input_path: str,
75+
region_filter: str = None,
76+
file_type_filter: str = None,
77+
month_filter: int = -1,
78+
) -> "pd.DataFrame":
7679
"""Generate the dataframe of source data (folder or a file)
7780
78-
:yield: first the total amount of files and then
81+
:yield: first the total amount of files and then
7982
a tuple with filepath and DataFrame
8083
:rtype: generator
8184
"""
8285
if path.isdir(input_path):
83-
files = [
84-
file_ for file_ in os.listdir(
85-
input_path) if file_.find("csv") != -1
86-
]
86+
files = [file_ for file_ in os.listdir(input_path) if file_.find("csv") != -1]
8787
yield len(files)
8888
for filename in sorted(files):
8989
if month_filter != -1:
9090
if _get_month(filename) != month_filter:
9191
continue
9292
filepath = path.join(input_path, filename)
93-
df = _load_csv_file(
94-
filepath,
95-
region_filter,
96-
file_type_filter
97-
)
93+
df = _load_csv_file(filepath, region_filter, file_type_filter)
9894
yield filepath, df
9995
else:
10096
yield 1
101-
yield input_path, _load_csv_file(
102-
input_path, region_filter, file_type_filter
103-
)
97+
yield input_path, _load_csv_file(input_path, region_filter, file_type_filter)
10498

10599

106-
def csv_data(input_path: str, region_filter: str = None,
107-
file_type_filter: str = None,
108-
month_filter: int = -1,
109-
concat: bool = True,
110-
generate: bool = False) -> 'pd.DataFrame':
100+
def csv_data(
101+
input_path: str,
102+
region_filter: str = None,
103+
file_type_filter: str = None,
104+
month_filter: int = -1,
105+
concat: bool = True,
106+
generate: bool = False,
107+
) -> "pd.DataFrame":
111108
"""Open csv data folder and files
112109
113110
:return: The whole dataset
@@ -116,25 +113,20 @@ def csv_data(input_path: str, region_filter: str = None,
116113
assert concat != generate, "You cannot concat and generate data..."
117114
if path.isdir(input_path):
118115
data_frames = []
119-
files = [
120-
file_ for file_ in os.listdir(
121-
input_path) if file_.find("csv") != -1
122-
]
123-
for filename in tqdm(sorted(files), desc=f"{STATUS_ARROW}Load folder {input_path}"):
116+
files = [file_ for file_ in os.listdir(input_path) if file_.find("csv") != -1]
117+
for filename in tqdm(
118+
sorted(files), desc=f"{STATUS_ARROW}Load folder {input_path}"
119+
):
124120
if month_filter != -1:
125121
if _get_month(filename) != month_filter:
126122
continue
127123
cur_dataframe = _load_csv_file(
128-
path.join(input_path, filename),
129-
region_filter,
130-
file_type_filter
124+
path.join(input_path, filename), region_filter, file_type_filter
131125
)
132126
if generate:
133127
yield cur_dataframe
134128
else:
135-
data_frames.append(
136-
cur_dataframe
137-
)
129+
data_frames.append(cur_dataframe)
138130
else:
139131
if data_frames:
140132
if concat:

0 commit comments

Comments
 (0)