Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removed the dependencies of pandas #1

Merged
merged 5 commits into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,5 +158,5 @@ cython_debug/
# generated
example/results/*

# previous
UI/UI_old.py
# test
testIO.py
10 changes: 3 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
JSON generator from Excel files with template.

### Dependencies
- pandas
- tkinter
- openpyxl

## Usage
using template to build up the links between Excel and JSON entries
Expand All @@ -15,8 +14,5 @@ formats of the entries in Excel dataset
1. string: ```"example"```
2. int & float: ```15```, ```1.0```
3. range: ```[-1,100]```
4. list: ```l[121,abc,def]```, ```l["anc,"hele",""test"]```
- string elements only
- begin with ```l[``` as the notation
5. dict:
- no dictionary as entries, instead using column names to locate the position directly.
4. list: ```[here, is, example]```, ```["here", "is", "example"]```
5. dict: - **NO** dictionary as entries, instead using column names to locate the position directly.
3 changes: 1 addition & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ name: jsonGen
channels:
- defaults
dependencies:
- numpy==1.24.3
- pandas==2.0.3
- openpyxl==3.0.10
5 changes: 4 additions & 1 deletion example/test1.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
"field" : "field",
"subfield" : "subfield",
"cost" : "cost",
"lasting" : "lasting",
"lasting" : {
"last" : "lasting"
},
"interval": "range",
"consequence" : "conseq",
"disclosureProb" : "discl",
"address": {
Expand Down
Binary file modified example/test1.xlsx
Binary file not shown.
31 changes: 16 additions & 15 deletions example/test2.json
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
{
"identifier" : "ide",
"name" : "name",
"address": {
"city": "city",
"zipcode": "code"
},
"range": "range",
"list": "list",
"complex":[{
"A":"A",
"B":"B"
"identifier" : "ide",
"name" : "name",
"address": {
"city": "city",
"zipcode": "code"
},
{
"C":"C",
"D":"D"
}
"descr": "descr",
"range": "range",
"list": "list",
"complex":[{
"A":"A",
"B":"B"
},
{
"C":"C",
"D":"D"
}
]
}
Binary file modified example/test2.xlsx
Binary file not shown.
6 changes: 5 additions & 1 deletion utils/__utils_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
##############################################
### HERE TO TEST FUNCTIONS IN utils BLOCKS ###
##############################################

# Using pytest assertions
def add(a,b):
return a+b

def test_addition():
result = add(1, 2)
assert result == 3, f"Expected 3, but got {result}"
assert result == 3, f"Expected 3, but got {result}"
14 changes: 5 additions & 9 deletions utils/core.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
import json
import copy

import pandas as pd
# import jsonpath_ng
# from objectpath import Tree

import utils.fileIO as IO
import utils.process as PS

class JSONGenerator:
def __init__(self):
self.name = ""
self.template: dict = None
self.dataset: pd.DataFrame = None
self.dataset: list[dict] = None
self.data_size: int = -1
self.data_columns: pd.Index = None
self.data_columns: list = None
self.links: dict = None
self.option_list = dict()
self.previews = []
Expand All @@ -38,16 +34,16 @@ def generate_json(self, g_range: tuple[int, int] = None):
g_range = (0, self.data_size)

self.links = PS.parse_links(self.template, self.data_columns)
self.dataset = PS.process(self.dataset, self.option_list)
self.dataset = PS.process_options(self.dataset, self.option_list)

for i in range(*g_range):
data = self.dataset.iloc[i]
data = self.dataset[i]
raw = copy.deepcopy(self.template)

for link_name, link in self.links.items():
# code = f"{link} = \'{data[link_name]}\'"
print(link)
raw = IO.update_json(raw, link, data[link_name])
raw = IO.update_json_dict(raw, link, data[link_name])

self.previews[i] = raw

Expand Down
45 changes: 33 additions & 12 deletions utils/fileIO.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pandas as pd
import openpyxl as oxl
import os
import json
import numpy as np

def read_json(json_file_path: str):
# Reading JSON file content into a string
Expand All @@ -18,26 +17,48 @@ def read_json(json_file_path: str):


def read_excel(excel_file_path: str):
loaded_excel = pd.read_excel(excel_file_path).dropna(axis=1, how='all')
wb = oxl.load_workbook(excel_file_path)
sheet = wb['Sheet1']
loaded_excel = []
for row in sheet.iter_rows(min_row=1, max_row=sheet.max_row, min_col=1, max_col=sheet.max_column, values_only=True):
loaded_excel.append(list(row))

loaded_columns = []
loaded_dataset = [{} for _ in range(len(loaded_excel) - 1)]

for col in range(len(loaded_excel[0])):
if loaded_excel[0][col] is None:
continue

loaded_columns.append(loaded_excel[0][col])
for row in range(len(loaded_excel) - 1):
loaded_dataset[row][loaded_excel[0][col]] = loaded_excel[row + 1][col]

return loaded_dataset, loaded_columns, len(loaded_dataset)

return loaded_excel, loaded_excel.columns, len(loaded_excel)

def update_json_dict(target_dict: dict, path: list[str], value: any):
"""
Update a specific value in a nested dictionary.

def update_json(tar, path: list[str], value: any):
"""Update JSON dictionnary PATH with VALUE. Return updated JSON"""
Parameters:
target_dict (dict): The dictionary to be updated.
path_to_value (list[str]): The path to the value to be updated. Each element in the list represents a key in the dictionary.
new_value (any): The new value to be set.

if path is None:
return tar
Returns:
dict: The updated dictionary.
"""

if type(value) is np.int64:
value = int(value)
if path is None:
return target_dict

if len(path) == 0: # the last position
return value

tar[path[0]] = update_json(tar[path[0]], path[1:], value)
target_dict[path[0]] = update_json_dict(target_dict[path[0]], path[1:], value)

return tar
return target_dict


def write_json(json_obj: dict, json_output_path: str):
Expand Down
105 changes: 63 additions & 42 deletions utils/process.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pandas as pd
import os
import copy


class Options:
Expand All @@ -9,8 +9,69 @@ def __init__(self):
self.number_to_string = False
self.string_to_number = False
# self.expression: str = None # e.g. 'str(x)+".png"'

def __process_list(list_string: str):
## check if the string is a list
if not list_string.startswith("[") or not list_string.endswith("]") :
return list_string

## preprocess the string
items = list_string.strip('[]').split(',')

# Process each item
for i, item in enumerate(items):
if item.startswith('['):
items[i] = __process_list(item)
else:
# convert numeric items to int or float
try:
items[i] = int(item)
except ValueError:
try:
items[i] = float(item)
except ValueError:
items[i] = item.strip('\'"')

return items


def __process_column(dataset: list[dict], column: str, opt: Options) -> list[dict]:
processed_dataset = copy.deepcopy(dataset)

for row in range(len(processed_dataset)):
## process list
if type(processed_dataset[row][column]) is str:
processed_dataset[row][column] = __process_list(processed_dataset[row][column])

def parse_links(template, columns: pd.Index):
## process options
if opt.remove_spaces:
processed_dataset[row][column] = str(processed_dataset[row][column]).strip()

if opt.remove_ext_name:
processed_dataset[row][column] = os.path.splitext(processed_dataset[row][column])[0]

if opt.string_to_number:
processed_dataset[row][column] = float(processed_dataset[row][column])

if opt.number_to_string:
processed_dataset[row][column] = str(processed_dataset[row][column])

# if opt.expression is not None:
# processed_dataset[row][column] = eval(processed_dataset[row][column], {'x': x})

return processed_dataset


def process_options(dataset: list[dict], opt_list: dict[str, Options]) -> list[dict]:
processed_dataset = copy.deepcopy(dataset)

for column, options in opt_list.items():
processed_dataset = __process_column(processed_dataset, column, options)

return processed_dataset


def parse_links(template, columns: list):
def find_path(json_obj, target_value, current_path=[]):
"""
Recursively find the first path with the given value in a JSON structure.
Expand Down Expand Up @@ -44,43 +105,3 @@ def find_path(json_obj, target_value, current_path=[]):
links[column] = find_path(template, column)

return links

def process_value(value: any):
if type(value) is str:
if value.startswith('[') and value.endswith(']'): # e.g. [0,100]
value_r = value.replace(' ', '').replace(']', '').replace('[', '').split(",")
if len(value_r) == 2:
value = [float(v) for v in value_r]

elif value.startswith('l[') and value.endswith(']'): # e.g. l["123", "asda", "12313"]
value = value.replace(' ', '').replace(']', '').replace('l[', '').replace('\"', '').split(",")

return value


def process_column(datacol: pd.DataFrame, opt: Options) -> pd.DataFrame:
datacol = datacol.apply(process_value)

if opt.remove_spaces:
datacol = datacol.astype(str).str.strip()

if opt.remove_ext_name:
datacol = datacol.apply(lambda x: os.path.splitext(x)[0])

if opt.string_to_number:
datacol = datacol.astype(float)

if opt.number_to_string:
datacol = datacol.astype(str)

# if opt.expression is not None:
# datacol = datacol.apply(lambda x: eval(opt.expression, {'x': x}))

return datacol


def process(dataset: pd.DataFrame, opt_list: dict[str, Options]) -> pd.DataFrame:
for column, option in opt_list.items():
dataset[column] = process_column(dataset[column], option)

return dataset
Loading