-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathacquisitionMapper.py
108 lines (94 loc) · 4.16 KB
/
acquisitionMapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import json
from imageMapper import readFile, formatMetadata, extractImageMappings, extractImageData, headerMapping, writeMetadataToJson
def extract_metadata_addresses(json_file):
# Read the JSON file
with open(json_file, 'r') as file:
data = json.load(file)
# Extract the "acquisition" information
acquisition_data = data.get('acquisition_main_schema', {})
# Store key-value pairs starting with "EMProject"
EM_metadata = {}
Image_metadata = {}
for key, value in acquisition_data.items():
if value.startswith('EMProject'):
EM_metadata[key] = value
elif value.startswith('Images'):
Image_metadata[key] = value
return EM_metadata, Image_metadata
import xml.etree.ElementTree as ET
def xml_to_dict(file_path):
def parse_element(element):
result = {}
if len(element) == 0:
return element.text
for child in element:
child_data = parse_element(child)
if '}' in child.tag:
child_tag = child.tag.split('}', 1)[1] # Remove the namespace
else:
child_tag = child.tag
if child_tag in result:
if type(result[child_tag]) is list:
result[child_tag].append(child_data)
else:
result[child_tag] = [result[child_tag], child_data]
else:
result[child_tag] = child_data
return result
tree = ET.parse(file_path)
root = tree.getroot()
root_tag = root.tag
if '}' in root_tag:
root_tag = root_tag.split('}', 1)[1] # Remove the namespace from the root tag
return {root_tag: parse_element(root)}
def traverse_dict(data, path):
keys = path.split('.')
result = data
try:
for key in keys:
result = result[key]
return result
except (KeyError, TypeError):
return None
def extract_values(addresses, data, dataset_num = 1):
result = {}
for key, address in addresses.items():
levels = address.split('.')
current_data = data
# print(levels)
for level in levels:
# emxml contains multiple instances of "Dataset", so it returns a list when asked. We need to tell it
# which dataset we actually want. 1 is SEM Image, 2 is SEM Image 2, and 3 is the one we're not
# interested in. We subtract 1 because indexing begins at zero
# Still needed: check this against image folder name
# print(f"current level: {level}")
if level == 'Dataset':
if isinstance(current_data[level], list):
try:
current_data = current_data[level][dataset_num - 1]
except IndexError:
print(f"There is no dataset at index {dataset_num}.")
else:
current_data = current_data[level]
else:
current_data = current_data[level]
result[key] = current_data
return result
if __name__ == "__main__":
# Define relevant files
emxml_file = '/Users/elias/Desktop/NFDI Tomographiedaten/20200818_AlSi13 XRM tomo2/EMproject.emxml'
json_file = '/Users/elias/Desktop/PP13_Mapping/pp13-mapper/schemas/sem_fib_nested_schema_map.json'
imgFile = '/Users/elias/Desktop/NFDI Tomographiedaten/20200818_AlSi13 XRM tomo2/Images/SEM Image/SEM Image - SliceImage - 001.tif'
output_path = '/Users/elias/Desktop/PP13_Mapping/pp13-mapper/results/acquisitionMetadata.json'
dataset_num = 1 # 1 or 2
emMetadata, imgMetadata = extract_metadata_addresses(json_file)
emData = xml_to_dict(emxml_file)
mappedEMMetadata = extract_values(emMetadata, emData, dataset_num)
image_data = readFile(imgFile)
formatted_metadata = formatMetadata(image_data)
image_metadata = extractImageData(formatted_metadata, imgMetadata)
mappedImgMetadata = headerMapping(image_metadata, imgMetadata)
# Merge the two metadata dictionaries
acquisitionMetadata = {**mappedEMMetadata, **mappedImgMetadata}
writeMetadataToJson(acquisitionMetadata, output_path)
print(f'Successfully processed. Results are in {output_path}')