diff --git a/.gitignore b/.gitignore index 1e280e6..590ede8 100644 --- a/.gitignore +++ b/.gitignore @@ -158,5 +158,5 @@ cython_debug/ # generated example/results/* -# previous -UI/UI_old.py \ No newline at end of file +# test +testIO.py \ No newline at end of file diff --git a/README.md b/README.md index 90969ad..eb9eef0 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,7 @@ JSON generator from Excel files with template. ### Dependencies -- pandas -- tkinter +- openpyxl ## Usage using template to build up the links between Excel and JSON entries @@ -15,8 +14,5 @@ formats of the entries in Excel dataset 1. string: ```"example"``` 2. int & float: ```15```, ```1.0``` 3. range: ```[-1,100]``` -4. list: ```l[121,abc,def]```, ```l["anc,"hele",""test"]``` - - string elements only - - begin with ```l[``` as the notation -5. dict: - - no dictionary as entries, instead using column names to locate the position directly. \ No newline at end of file +4. list: ```[here, is, example]```, ```["here", "is", "example"]``` +5. dict: - **NO** dictionary as entries, instead using column names to locate the position directly. \ No newline at end of file diff --git a/environment.yml b/environment.yml index a6db831..2004870 100644 --- a/environment.yml +++ b/environment.yml @@ -2,5 +2,4 @@ name: jsonGen channels: - defaults dependencies: - - numpy==1.24.3 - - pandas==2.0.3 + - openpyxl==3.0.10 diff --git a/example/test1.json b/example/test1.json index cb0d727..5bfaa20 100644 --- a/example/test1.json +++ b/example/test1.json @@ -5,7 +5,10 @@ "field" : "field", "subfield" : "subfield", "cost" : "cost", - "lasting" : "lasting", + "lasting" : { + "last" : "lasting" + }, + "interval": "range", "consequence" : "conseq", "disclosureProb" : "discl", "address": { diff --git a/example/test1.xlsx b/example/test1.xlsx index 0331c9c..26a1481 100644 Binary files a/example/test1.xlsx and b/example/test1.xlsx differ diff --git a/example/test2.json b/example/test2.json index 082453c..19299f1 100644 --- a/example/test2.json +++ b/example/test2.json @@ -1,19 +1,20 @@ { - "identifier" : "ide", - "name" : "name", - "address": { - "city": "city", - "zipcode": "code" - }, - "range": "range", - "list": "list", - "complex":[{ - "A":"A", - "B":"B" + "identifier" : "ide", + "name" : "name", + "address": { + "city": "city", + "zipcode": "code" }, - { - "C":"C", - "D":"D" - } + "descr": "descr", + "range": "range", + "list": "list", + "complex":[{ + "A":"A", + "B":"B" + }, + { + "C":"C", + "D":"D" + } ] } diff --git a/example/test2.xlsx b/example/test2.xlsx index 3f0500e..79952a2 100644 Binary files a/example/test2.xlsx and b/example/test2.xlsx differ diff --git a/utils/__utils_test.py b/utils/__utils_test.py index f742d39..698c7b4 100644 --- a/utils/__utils_test.py +++ b/utils/__utils_test.py @@ -1,7 +1,11 @@ +############################################## +### HERE TO TEST FUNCTIONS IN utils BLOCKS ### +############################################## + # Using pytest assertions def add(a,b): return a+b def test_addition(): result = add(1, 2) - assert result == 3, f"Expected 3, but got {result}" \ No newline at end of file + assert result == 3, f"Expected 3, but got {result}" diff --git a/utils/core.py b/utils/core.py index 37394ee..9d20eda 100644 --- a/utils/core.py +++ b/utils/core.py @@ -1,10 +1,6 @@ import json import copy -import pandas as pd -# import jsonpath_ng -# from objectpath import Tree - import utils.fileIO as IO import utils.process as PS @@ -12,9 +8,9 @@ class JSONGenerator: def __init__(self): self.name = "" self.template: dict = None - self.dataset: pd.DataFrame = None + self.dataset: list[dict] = None self.data_size: int = -1 - self.data_columns: pd.Index = None + self.data_columns: list = None self.links: dict = None self.option_list = dict() self.previews = [] @@ -38,16 +34,16 @@ def generate_json(self, g_range: tuple[int, int] = None): g_range = (0, self.data_size) self.links = PS.parse_links(self.template, self.data_columns) - self.dataset = PS.process(self.dataset, self.option_list) + self.dataset = PS.process_options(self.dataset, self.option_list) for i in range(*g_range): - data = self.dataset.iloc[i] + data = self.dataset[i] raw = copy.deepcopy(self.template) for link_name, link in self.links.items(): # code = f"{link} = \'{data[link_name]}\'" print(link) - raw = IO.update_json(raw, link, data[link_name]) + raw = IO.update_json_dict(raw, link, data[link_name]) self.previews[i] = raw diff --git a/utils/fileIO.py b/utils/fileIO.py index ff49ed2..6a9fa2b 100644 --- a/utils/fileIO.py +++ b/utils/fileIO.py @@ -1,7 +1,6 @@ -import pandas as pd +import openpyxl as oxl import os import json -import numpy as np def read_json(json_file_path: str): # Reading JSON file content into a string @@ -18,26 +17,48 @@ def read_json(json_file_path: str): def read_excel(excel_file_path: str): - loaded_excel = pd.read_excel(excel_file_path).dropna(axis=1, how='all') + wb = oxl.load_workbook(excel_file_path) + sheet = wb['Sheet1'] + loaded_excel = [] + for row in sheet.iter_rows(min_row=1, max_row=sheet.max_row, min_col=1, max_col=sheet.max_column, values_only=True): + loaded_excel.append(list(row)) + + loaded_columns = [] + loaded_dataset = [{} for _ in range(len(loaded_excel) - 1)] + + for col in range(len(loaded_excel[0])): + if loaded_excel[0][col] is None: + continue + + loaded_columns.append(loaded_excel[0][col]) + for row in range(len(loaded_excel) - 1): + loaded_dataset[row][loaded_excel[0][col]] = loaded_excel[row + 1][col] + + return loaded_dataset, loaded_columns, len(loaded_dataset) - return loaded_excel, loaded_excel.columns, len(loaded_excel) +def update_json_dict(target_dict: dict, path: list[str], value: any): + """ + Update a specific value in a nested dictionary. -def update_json(tar, path: list[str], value: any): - """Update JSON dictionnary PATH with VALUE. Return updated JSON""" + Parameters: + target_dict (dict): The dictionary to be updated. + path_to_value (list[str]): The path to the value to be updated. Each element in the list represents a key in the dictionary. + new_value (any): The new value to be set. - if path is None: - return tar + Returns: + dict: The updated dictionary. + """ - if type(value) is np.int64: - value = int(value) + if path is None: + return target_dict if len(path) == 0: # the last position return value - tar[path[0]] = update_json(tar[path[0]], path[1:], value) + target_dict[path[0]] = update_json_dict(target_dict[path[0]], path[1:], value) - return tar + return target_dict def write_json(json_obj: dict, json_output_path: str): diff --git a/utils/process.py b/utils/process.py index 4de0893..79cb185 100644 --- a/utils/process.py +++ b/utils/process.py @@ -1,5 +1,5 @@ -import pandas as pd import os +import copy class Options: @@ -9,8 +9,69 @@ def __init__(self): self.number_to_string = False self.string_to_number = False # self.expression: str = None # e.g. 'str(x)+".png"' + +def __process_list(list_string: str): + ## check if the string is a list + if not list_string.startswith("[") or not list_string.endswith("]") : + return list_string + + ## preprocess the string + items = list_string.strip('[]').split(',') + + # Process each item + for i, item in enumerate(items): + if item.startswith('['): + items[i] = __process_list(item) + else: + # convert numeric items to int or float + try: + items[i] = int(item) + except ValueError: + try: + items[i] = float(item) + except ValueError: + items[i] = item.strip('\'"') + + return items + + +def __process_column(dataset: list[dict], column: str, opt: Options) -> list[dict]: + processed_dataset = copy.deepcopy(dataset) + + for row in range(len(processed_dataset)): + ## process list + if type(processed_dataset[row][column]) is str: + processed_dataset[row][column] = __process_list(processed_dataset[row][column]) -def parse_links(template, columns: pd.Index): + ## process options + if opt.remove_spaces: + processed_dataset[row][column] = str(processed_dataset[row][column]).strip() + + if opt.remove_ext_name: + processed_dataset[row][column] = os.path.splitext(processed_dataset[row][column])[0] + + if opt.string_to_number: + processed_dataset[row][column] = float(processed_dataset[row][column]) + + if opt.number_to_string: + processed_dataset[row][column] = str(processed_dataset[row][column]) + + # if opt.expression is not None: + # processed_dataset[row][column] = eval(processed_dataset[row][column], {'x': x}) + + return processed_dataset + + +def process_options(dataset: list[dict], opt_list: dict[str, Options]) -> list[dict]: + processed_dataset = copy.deepcopy(dataset) + + for column, options in opt_list.items(): + processed_dataset = __process_column(processed_dataset, column, options) + + return processed_dataset + + +def parse_links(template, columns: list): def find_path(json_obj, target_value, current_path=[]): """ Recursively find the first path with the given value in a JSON structure. @@ -44,43 +105,3 @@ def find_path(json_obj, target_value, current_path=[]): links[column] = find_path(template, column) return links - -def process_value(value: any): - if type(value) is str: - if value.startswith('[') and value.endswith(']'): # e.g. [0,100] - value_r = value.replace(' ', '').replace(']', '').replace('[', '').split(",") - if len(value_r) == 2: - value = [float(v) for v in value_r] - - elif value.startswith('l[') and value.endswith(']'): # e.g. l["123", "asda", "12313"] - value = value.replace(' ', '').replace(']', '').replace('l[', '').replace('\"', '').split(",") - - return value - - -def process_column(datacol: pd.DataFrame, opt: Options) -> pd.DataFrame: - datacol = datacol.apply(process_value) - - if opt.remove_spaces: - datacol = datacol.astype(str).str.strip() - - if opt.remove_ext_name: - datacol = datacol.apply(lambda x: os.path.splitext(x)[0]) - - if opt.string_to_number: - datacol = datacol.astype(float) - - if opt.number_to_string: - datacol = datacol.astype(str) - - # if opt.expression is not None: - # datacol = datacol.apply(lambda x: eval(opt.expression, {'x': x})) - - return datacol - - -def process(dataset: pd.DataFrame, opt_list: dict[str, Options]) -> pd.DataFrame: - for column, option in opt_list.items(): - dataset[column] = process_column(dataset[column], option) - - return dataset