Skip to content

Commit

Permalink
Merge pull request #1 from XDzzzzzZyq/No-Pandas
Browse files Browse the repository at this point in the history
Removed the dependencies of pandas
  • Loading branch information
XDzzzzzZyq authored Feb 2, 2024
2 parents 58d83b2 + 560ffa6 commit fce24d7
Show file tree
Hide file tree
Showing 11 changed files with 132 additions and 91 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,5 +158,5 @@ cython_debug/
# generated
example/results/*

# previous
UI/UI_old.py
# test
testIO.py
10 changes: 3 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
JSON generator from Excel files with template.

### Dependencies
- pandas
- tkinter
- openpyxl

## Usage
using template to build up the links between Excel and JSON entries
Expand All @@ -15,8 +14,5 @@ formats of the entries in Excel dataset
1. string: ```"example"```
2. int & float: ```15```, ```1.0```
3. range: ```[-1,100]```
4. list: ```l[121,abc,def]```, ```l["anc,"hele",""test"]```
- string elements only
- begin with ```l[``` as the notation
5. dict:
- no dictionary as entries, instead using column names to locate the position directly.
4. list: ```[here, is, example]```, ```["here", "is", "example"]```
5. dict: - **NO** dictionary as entries, instead using column names to locate the position directly.
3 changes: 1 addition & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ name: jsonGen
channels:
- defaults
dependencies:
- numpy==1.24.3
- pandas==2.0.3
- openpyxl==3.0.10
5 changes: 4 additions & 1 deletion example/test1.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
"field" : "field",
"subfield" : "subfield",
"cost" : "cost",
"lasting" : "lasting",
"lasting" : {
"last" : "lasting"
},
"interval": "range",
"consequence" : "conseq",
"disclosureProb" : "discl",
"address": {
Expand Down
Binary file modified example/test1.xlsx
Binary file not shown.
31 changes: 16 additions & 15 deletions example/test2.json
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
{
"identifier" : "ide",
"name" : "name",
"address": {
"city": "city",
"zipcode": "code"
},
"range": "range",
"list": "list",
"complex":[{
"A":"A",
"B":"B"
"identifier" : "ide",
"name" : "name",
"address": {
"city": "city",
"zipcode": "code"
},
{
"C":"C",
"D":"D"
}
"descr": "descr",
"range": "range",
"list": "list",
"complex":[{
"A":"A",
"B":"B"
},
{
"C":"C",
"D":"D"
}
]
}
Binary file modified example/test2.xlsx
Binary file not shown.
6 changes: 5 additions & 1 deletion utils/__utils_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
##############################################
### HERE TO TEST FUNCTIONS IN utils BLOCKS ###
##############################################

# Using pytest assertions
def add(a,b):
return a+b

def test_addition():
result = add(1, 2)
assert result == 3, f"Expected 3, but got {result}"
assert result == 3, f"Expected 3, but got {result}"
14 changes: 5 additions & 9 deletions utils/core.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
import json
import copy

import pandas as pd
# import jsonpath_ng
# from objectpath import Tree

import utils.fileIO as IO
import utils.process as PS

class JSONGenerator:
def __init__(self):
self.name = ""
self.template: dict = None
self.dataset: pd.DataFrame = None
self.dataset: list[dict] = None
self.data_size: int = -1
self.data_columns: pd.Index = None
self.data_columns: list = None
self.links: dict = None
self.option_list = dict()
self.previews = []
Expand All @@ -38,16 +34,16 @@ def generate_json(self, g_range: tuple[int, int] = None):
g_range = (0, self.data_size)

self.links = PS.parse_links(self.template, self.data_columns)
self.dataset = PS.process(self.dataset, self.option_list)
self.dataset = PS.process_options(self.dataset, self.option_list)

for i in range(*g_range):
data = self.dataset.iloc[i]
data = self.dataset[i]
raw = copy.deepcopy(self.template)

for link_name, link in self.links.items():
# code = f"{link} = \'{data[link_name]}\'"
print(link)
raw = IO.update_json(raw, link, data[link_name])
raw = IO.update_json_dict(raw, link, data[link_name])

self.previews[i] = raw

Expand Down
45 changes: 33 additions & 12 deletions utils/fileIO.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pandas as pd
import openpyxl as oxl
import os
import json
import numpy as np

def read_json(json_file_path: str):
# Reading JSON file content into a string
Expand All @@ -18,26 +17,48 @@ def read_json(json_file_path: str):


def read_excel(excel_file_path: str):
loaded_excel = pd.read_excel(excel_file_path).dropna(axis=1, how='all')
wb = oxl.load_workbook(excel_file_path)
sheet = wb['Sheet1']
loaded_excel = []
for row in sheet.iter_rows(min_row=1, max_row=sheet.max_row, min_col=1, max_col=sheet.max_column, values_only=True):
loaded_excel.append(list(row))

loaded_columns = []
loaded_dataset = [{} for _ in range(len(loaded_excel) - 1)]

for col in range(len(loaded_excel[0])):
if loaded_excel[0][col] is None:
continue

loaded_columns.append(loaded_excel[0][col])
for row in range(len(loaded_excel) - 1):
loaded_dataset[row][loaded_excel[0][col]] = loaded_excel[row + 1][col]

return loaded_dataset, loaded_columns, len(loaded_dataset)

return loaded_excel, loaded_excel.columns, len(loaded_excel)

def update_json_dict(target_dict: dict, path: list[str], value: any):
"""
Update a specific value in a nested dictionary.
def update_json(tar, path: list[str], value: any):
"""Update JSON dictionnary PATH with VALUE. Return updated JSON"""
Parameters:
target_dict (dict): The dictionary to be updated.
path_to_value (list[str]): The path to the value to be updated. Each element in the list represents a key in the dictionary.
new_value (any): The new value to be set.
if path is None:
return tar
Returns:
dict: The updated dictionary.
"""

if type(value) is np.int64:
value = int(value)
if path is None:
return target_dict

if len(path) == 0: # the last position
return value

tar[path[0]] = update_json(tar[path[0]], path[1:], value)
target_dict[path[0]] = update_json_dict(target_dict[path[0]], path[1:], value)

return tar
return target_dict


def write_json(json_obj: dict, json_output_path: str):
Expand Down
105 changes: 63 additions & 42 deletions utils/process.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pandas as pd
import os
import copy


class Options:
Expand All @@ -9,8 +9,69 @@ def __init__(self):
self.number_to_string = False
self.string_to_number = False
# self.expression: str = None # e.g. 'str(x)+".png"'

def __process_list(list_string: str):
## check if the string is a list
if not list_string.startswith("[") or not list_string.endswith("]") :
return list_string

## preprocess the string
items = list_string.strip('[]').split(',')

# Process each item
for i, item in enumerate(items):
if item.startswith('['):
items[i] = __process_list(item)
else:
# convert numeric items to int or float
try:
items[i] = int(item)
except ValueError:
try:
items[i] = float(item)
except ValueError:
items[i] = item.strip('\'"')

return items


def __process_column(dataset: list[dict], column: str, opt: Options) -> list[dict]:
processed_dataset = copy.deepcopy(dataset)

for row in range(len(processed_dataset)):
## process list
if type(processed_dataset[row][column]) is str:
processed_dataset[row][column] = __process_list(processed_dataset[row][column])

def parse_links(template, columns: pd.Index):
## process options
if opt.remove_spaces:
processed_dataset[row][column] = str(processed_dataset[row][column]).strip()

if opt.remove_ext_name:
processed_dataset[row][column] = os.path.splitext(processed_dataset[row][column])[0]

if opt.string_to_number:
processed_dataset[row][column] = float(processed_dataset[row][column])

if opt.number_to_string:
processed_dataset[row][column] = str(processed_dataset[row][column])

# if opt.expression is not None:
# processed_dataset[row][column] = eval(processed_dataset[row][column], {'x': x})

return processed_dataset


def process_options(dataset: list[dict], opt_list: dict[str, Options]) -> list[dict]:
processed_dataset = copy.deepcopy(dataset)

for column, options in opt_list.items():
processed_dataset = __process_column(processed_dataset, column, options)

return processed_dataset


def parse_links(template, columns: list):
def find_path(json_obj, target_value, current_path=[]):
"""
Recursively find the first path with the given value in a JSON structure.
Expand Down Expand Up @@ -44,43 +105,3 @@ def find_path(json_obj, target_value, current_path=[]):
links[column] = find_path(template, column)

return links

def process_value(value: any):
if type(value) is str:
if value.startswith('[') and value.endswith(']'): # e.g. [0,100]
value_r = value.replace(' ', '').replace(']', '').replace('[', '').split(",")
if len(value_r) == 2:
value = [float(v) for v in value_r]

elif value.startswith('l[') and value.endswith(']'): # e.g. l["123", "asda", "12313"]
value = value.replace(' ', '').replace(']', '').replace('l[', '').replace('\"', '').split(",")

return value


def process_column(datacol: pd.DataFrame, opt: Options) -> pd.DataFrame:
datacol = datacol.apply(process_value)

if opt.remove_spaces:
datacol = datacol.astype(str).str.strip()

if opt.remove_ext_name:
datacol = datacol.apply(lambda x: os.path.splitext(x)[0])

if opt.string_to_number:
datacol = datacol.astype(float)

if opt.number_to_string:
datacol = datacol.astype(str)

# if opt.expression is not None:
# datacol = datacol.apply(lambda x: eval(opt.expression, {'x': x}))

return datacol


def process(dataset: pd.DataFrame, opt_list: dict[str, Options]) -> pd.DataFrame:
for column, option in opt_list.items():
dataset[column] = process_column(dataset[column], option)

return dataset

0 comments on commit fce24d7

Please sign in to comment.