From 3eddd2ce6b7dd54f83fd2ec0e8511217cd2b3c46 Mon Sep 17 00:00:00 2001 From: mkuehbach Date: Thu, 29 Aug 2024 12:51:25 +0200 Subject: [PATCH] Fixed generic regridding 1d, 2d, fixed H5OINA overview plot, next step fix IPF maps --- .vscode/launch.json | 4 +- src/pynxtools_em/examples/ebsd_database.py | 10 - src/pynxtools_em/methods/ebsd.py | 412 +++++++++++++++--- src/pynxtools_em/parsers/hfive_apex.py | 9 +- src/pynxtools_em/parsers/hfive_bruker.py | 8 +- src/pynxtools_em/parsers/hfive_ebsd.py | 1 - src/pynxtools_em/parsers/hfive_oxford.py | 139 +++--- src/pynxtools_em/reader.py | 14 +- src/pynxtools_em/utils/get_scan_points.py | 134 ------ src/pynxtools_em/utils/get_sqr_grid.py | 183 -------- .../{hfive_web_constants.py => hfive_web.py} | 2 - 11 files changed, 418 insertions(+), 498 deletions(-) delete mode 100644 src/pynxtools_em/utils/get_scan_points.py delete mode 100644 src/pynxtools_em/utils/get_sqr_grid.py rename src/pynxtools_em/utils/{hfive_web_constants.py => hfive_web.py} (97%) diff --git a/.vscode/launch.json b/.vscode/launch.json index 0341c9a..659ed62 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -39,6 +39,7 @@ //"../apex/InGaN_nanowires_map.edaxh5", //"../apex/InGaN_nanowires_spectra.edaxh5", //"../apex/2023-08-16_Ni_NFDI.edaxh5", + "tests/data/173_0057.h5oina", "--reader", "em", "--nxdl", @@ -55,7 +56,8 @@ //"--output=dbg/tescan.nxs", //"--output=pynxtools_em/dbg/zeiss.nxs", //"--output=pynxtools_em/dbg/tfs.nxs", - //"--output=dbg/apex.nxs"], + //"--output=dbg/apex.nxs" + "--output=dbg/hfive_oxford.nxs"], } ] } \ No newline at end of file diff --git a/src/pynxtools_em/examples/ebsd_database.py b/src/pynxtools_em/examples/ebsd_database.py index 3636e84..6be6270 100644 --- a/src/pynxtools_em/examples/ebsd_database.py +++ b/src/pynxtools_em/examples/ebsd_database.py @@ -25,16 +25,6 @@ onto the set of NXem/ENTRY/sample/atom_types. """ -# typical scan schemes used for EBSD -HEXAGONAL_FLAT_TOP_TILING = "hexagonal_flat_top_tiling" -SQUARE_TILING = "square_tiling" -REGULAR_TILING = "regular_tiling" - -# most frequently this is the sequence of set scan positions with actual positions -# based on grid type, spacing, and tiling -FLIGHT_PLAN = "start_top_left_stack_x_left_to_right_stack_x_line_along_end_bottom_right" - - FREE_TEXT_TO_CONCEPT = { "Actinolite": "Actinolite", "al": "Al", diff --git a/src/pynxtools_em/methods/ebsd.py b/src/pynxtools_em/methods/ebsd.py index 25d6364..cd71599 100644 --- a/src/pynxtools_em/methods/ebsd.py +++ b/src/pynxtools_em/methods/ebsd.py @@ -19,6 +19,7 @@ import mmap import os +from typing import Any, Dict import matplotlib.pyplot as plt # in the hope that this closes figures with orix plot import numpy as np @@ -27,16 +28,14 @@ from orix.quaternion.symmetry import get_point_group from orix.vector import Vector3d from PIL import Image as pil -from pynxtools_em.utils.get_scan_points import hexagonal_grid, square_grid, threed -from pynxtools_em.utils.get_sqr_grid import ( - regrid_onto_equisized_scan_points, -) -from pynxtools_em.utils.hfive_web_constants import ( +from pynxtools_em.utils.hfive_web import ( HFIVE_WEB_MAXIMUM_RGB, HFIVE_WEB_MAXIMUM_ROI, ) from pynxtools_em.utils.hfive_web_utils import hfive_web_decorate_nxdata from pynxtools_em.utils.image_processing import thumbnail +from pynxtools_em.utils.pint_custom_unit_registry import ureg +from scipy.spatial import KDTree PROJECTION_VECTORS = [Vector3d.xvector(), Vector3d.yvector(), Vector3d.zvector()] PROJECTION_DIRECTIONS = [ @@ -45,6 +44,37 @@ ("Z", Vector3d.zvector().data.flatten()), ] +# typical scan schemes used for EBSD +HEXAGONAL_FLAT_TOP_TILING = "hexagonal_flat_top_tiling" +SQUARE_TILING = "square_tiling" +REGULAR_TILING = "regular_tiling" +DEFAULT_LENGTH_UNIT = ureg.micrometer + +# most frequently this is the sequence of set scan positions with actual positions +# based on grid type, spacing, and tiling +FLIGHT_PLAN = "start_top_left_stack_x_left_to_right_stack_x_line_along_end_bottom_right" + + +class EbsdPointCloud: + """Cache for storing a single indexed EBSD point cloud with mark data.""" + + def __init__(self): + self.dimensionality: int = 0 + self.grid_type = None + # the next two lines encode the typical assumption that is not reported in tech partner file! + self.n: Dict[str, Any] = {} # number of grid points along "x", "y", "z" + self.s: Dict[str, Any] = {} # scan step along "x", "y", "z" + self.phase = [] # collection of phase class instances in order of self.phases + self.space_group = [] # collection of space group in order of self.phases + self.phases = {} # named phases + self.euler = None # Bunge-Euler ZXZ angle for each scan point np.nan otherwise + self.phase_id = None # phase_id for best solution found for each scan point + self.pos: Dict[ + str, Any + ] = {} # "x", "y", "z" pos for each scan point unmodified/not rediscretized + self.descr_type = None # NXem_ebsd/roi/descriptor (band contrast, CI, MAD) + self.descr_value = None + def get_ipfdir_legend(ipf_key): """Generate IPF color map key for a specific ipf_key.""" @@ -98,7 +128,174 @@ def get_named_axis(inp: dict, dim: str) -> np.ndarray: return None -def ebsd_roi_overview(inp: dict, id_mgn: dict, template: dict) -> dict: +def regrid_onto_equisized_scan_points( + src_grid: EbsdPointCloud, max_edge_discr: int +) -> dict: + """Discretize point cloud in R^d (d=1, 2, 3) and mark data to grid with equisized bins.""" + + if src_grid.dimensionality not in [1, 2]: + print( + f"The 1D and 3D gridding is currently not implemented because we do not " + f"have a large enough dataset to test the 3D case with !" + ) + return src_grid + # take discretization of the source grid as a guide for the target_grid + # optimization possible if square grid and matching maximum_extent + + dims = ["x", "y", "z"][0 : src_grid.dimensionality] + tuples = [] + for dim_idx, dim in enumerate(dims): + if dim in src_grid.n: + tuples.append((src_grid.n[dim], dim_idx)) + max_extent, max_dim = sorted(tuples, key=lambda x: x[0])[::-1][0] # descendingly + + # too large grid needs to be capped when gridded + # cap to the maximum extent to comply with H5Web technical constraints + if max_extent >= max_edge_discr: + max_extent = max_edge_discr + + # all non-square grids or too large square grids will be + # discretized onto a regular grid with square or cubic pixel/voxel + for dim in dims: + if dim in src_grid.pos: + if src_grid.pos[dim].units != ureg.Quantity(1.0, ureg.micrometer).units: + raise ValueError(f"Gridding demands values in micrometer !") + + aabb = {} + for dim in dims: + aabb[f"{dim}"] = [ + np.min( + src_grid.pos[f"{dim}"].magnitude - 0.5 * src_grid.s[f"{dim}"].magnitude + ), + np.max( + src_grid.pos[f"{dim}"].magnitude + 0.5 * src_grid.s[f"{dim}"].magnitude + ), + ] + print(f"{aabb}") + + trg_s = {} + trg_n = {} + if src_grid.dimensionality == 1: + trg_s["x"] = (aabb["x"][1] - aabb["x"][0]) / max_extent + trg_n["x"] = max_extent + elif src_grid.dimensionality == 2: + if aabb["x"][1] - aabb["x"][0] >= aabb["y"][1] - aabb["y"][0]: + trg_s["x"] = (aabb["x"][1] - aabb["x"][0]) / max_extent + trg_s["y"] = (aabb["x"][1] - aabb["x"][0]) / max_extent + trg_n["x"] = max_extent + trg_n["y"] = int(np.ceil((aabb["y"][1] - aabb["y"][0]) / trg_s["x"])) + else: + trg_s["x"] = (aabb["y"][1] - aabb["y"][0]) / max_extent + trg_s["y"] = (aabb["y"][1] - aabb["y"][0]) / max_extent + trg_n["x"] = int(np.ceil((aabb["x"][1] - aabb["x"][0]) / trg_s["x"])) + trg_n["y"] = max_extent + elif src_grid.dimensionality == 3: + print("TODO !!!!") + + print(f"H5Web default plot generation") + for dim in dims: + print(f"src_s {dim}: {src_grid.s[dim]} >>>> trg_s {dim}: {trg_s[dim]}") + print(f"src_n {dim}: {src_grid.n[dim]} >>>> trg_n {dim}: {trg_n[dim]}") + # the above estimate is not exactly correct (may create a slight real space shift) + # of the EBSD map TODO:: regrid the real world axis-aligned bounding box aabb with + # a regular tiling of squares or hexagons + # https://stackoverflow.com/questions/18982650/differences-between-matlab-and-numpy-and-pythons-round-function + # MTex/Matlab round not exactly the same as numpy round but reasonably close + + # scan point positions were normalized by tech partner parsers such that they + # always build on pixel coordinates calibrated for step size not by giving absolute positions + # in the sample surface frame of reference as this is typically not yet consistently documented + # because we assume in addition that we always start at the top left corner the zeroth/first + # coordinate is always 0., 0. ! + if src_grid.dimensionality == 1: + trg_pos = np.column_stack( + np.linspace(0, trg_n["x"] - 1, num=trg_n["x"], endpoint=True) * trg_s["x"], + 1, + ) + elif src_grid.dimensionality == 2: + trg_pos = np.column_stack( + ( + np.tile( + np.linspace(0, trg_n["x"] - 1, num=trg_n["x"], endpoint=True) + * trg_s["x"], + trg_n["y"], + ), + np.repeat( + np.linspace(0, trg_n["y"] - 1, num=trg_n["y"], endpoint=True) + * trg_s["y"], + trg_n["x"], + ), + ) + ) + # TODO:: if scan_point_{dim} are calibrated this approach + # here would shift the origin to 0, 0 implicitly which may not be desired + if src_grid.dimensionality == 1: + tree = KDTree(np.column_stack((src_grid.pos["x"]))) + elif src_grid.dimensionality == 2: + tree = KDTree(np.column_stack((src_grid.pos["x"], src_grid.pos["y"]))) + d, idx = tree.query(trg_pos, k=1) + if np.sum(idx == tree.n) > 0: + raise ValueError(f"kdtree query left some query points without a neighbor!") + del d + del tree + + # rebuild src_grid container with only the relevant src_grid selected from src_grid + trg_grid = EbsdPointCloud() + trg_grid.dimensionality = src_grid.dimensionality + + for dim_idx, dim in enumerate(dims): + trg_grid.s[dim] = ureg.Quantity(trg_s[dim], ureg.micrometer) + trg_grid.n[dim] = trg_n[dim] + trg_grid.pos[dim] = ureg.Quantity( + np.asarray(trg_pos[dim_idx], np.float64), ureg.micrometer + ) + if hasattr(src_grid, "euler"): + trg_grid.euler = np.empty((np.shape(trg_pos)[0], 3), np.float32) + trg_grid.euler.fill(np.nan) + trg_grid.euler = ureg.Quantity( + np.asarray(src_grid.euler.magnitude[idx, :], np.float32), ureg.radian + ) + if np.isnan(trg_grid.euler).any(): + raise ValueError(f"Gridding left scan points with incorrect euler !") + if hasattr(src_grid, "phase_id"): + trg_grid.phase_id = np.empty((np.shape(trg_pos)[0],), np.int32) + trg_grid.phase_id.fill(np.int32(-2)) + # pyxem_id phase_id are at least as large -1 + trg_grid.phase_id = np.asarray(src_grid.phase_id[idx], np.int32) + if np.sum(trg_grid.phase_id == -2) > 0: + raise ValueError(f"Gridding left scan points with incorrect phase_id !") + if src_grid.descr_type == "band_contrast": + # bc typically positive + trg_grid.descr_type = "band_contrast" + trg_grid.descr_value = np.empty((np.shape(trg_pos)[0]), np.uint32) + trg_grid.descr_value.fill(np.uint32(-1)) + trg_grid.descr_value = ureg.Quantity( + np.asarray(src_grid.descr_value[idx], np.uint32) + ) + elif src_grid.descr_type == "confidence_index": + trg_grid.descr_type = "confidence_index" + trg_grid.descr_value = np.empty((np.shape(trg_pos)[0]), np.float32) + trg_grid.descr_value.fill(np.nan) + trg_grid.descr_value = ureg.Quantity( + np.asarray(src_grid.descr_value[idx], np.float32) + ) + elif src_grid.descr_type == "mean_angular_deviation": + trg_grid.descr_type = "mean_angular_deviation" + trg_grid.descr_value = np.empty((np.shape(trg_pos)[0]), np.float32) + trg_grid.descr_value.fill(np.nan) + trg_grid.descr_value = ureg.Quantity( + np.asarray(src_grid.descr_value[idx], np.float32), ureg.radian + ) + else: + trg_grid.descr_type = None + trg_grid.descr_value = None + trg_grid.phase = src_grid.phase + trg_grid.space_group = src_grid.space_group + trg_grid.phases = src_grid.phases + return trg_grid + + +def ebsd_roi_overview(inp: EbsdPointCloud, id_mgn: dict, template: dict) -> dict: """Create an H5Web-capable ENTRY[entry]/ROI[roi*]/ebsd/indexing/roi.""" # EBSD maps are collected using different scan strategies but RDMs may not be # able to show all of them at the provided size and grid type @@ -106,90 +303,63 @@ def ebsd_roi_overview(inp: dict, id_mgn: dict, template: dict) -> dict: # and regridding on a square_grid with the maximum resolution supported by H5Web # this is never an upsampled but may represent a downsampled representation of the # actual ROI using some regridding scheme - trg_grid = regrid_onto_equisized_scan_points(inp, HFIVE_WEB_MAXIMUM_ROI) - - contrast_modes = [ - (None, "n/a"), - ("bc", "normalized_band_contrast"), - ("ci", "normalized_confidence_index"), - ("mad", "normalized_mean_angular_deviation"), - ] - contrast_mode = None - for mode in contrast_modes: - if mode[0] in trg_grid and contrast_mode is None: - contrast_mode = mode - break - if contrast_mode is None: - print( - f"ebsd_roi_overview no plot entry{id_mgn['entry_id']} roi{id_mgn['roi_id']} !" - ) + if inp.dimensionality not in [1, 2, 3]: + return template + trg_grid: EbsdPointCloud = regrid_onto_equisized_scan_points( + inp, HFIVE_WEB_MAXIMUM_ROI + ) + if not trg_grid.descr_type: return template trg = f"/ENTRY[entry{id_mgn['entry_id']}]/ROI[roi{id_mgn['roi_id']}]/ebsd/indexing/roi" - template[f"{trg}/title"] = f"Region-of-interest overview image" + template[f"{trg}/descriptor"] = trg_grid.descr_type + template[f"{trg}/title"] = ( + f"Region-of-interest overview image ({trg_grid.descr_type})" + ) + template[f"{trg}/data/@long_name"] = f"Signal" + template[f"{trg}/@signal"] = "data" - dims = ["x", "y"] - if trg_grid["dimensionality"] == 3: - dims.append("z") - idx = 0 - for dim in dims: + dims = ["x", "y", "z"][0 : trg_grid.dimensionality] + for idx, dim in enumerate(dims): template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(idx) template[f"{trg}/AXISNAME[axis_{dim}]"] = { - "compress": get_named_axis(trg_grid, dim), + "compress": np.asarray( + np.linspace(0, trg_grid.n[dim] - 1, num=trg_grid.n[dim], endpoint=True) + * trg_grid.s[dim] + ), "strength": 1, } template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] = ( - f"Coordinate along {dim}-axis ({trg_grid['s_{dim}'].units})" + f"Point coordinate along {dim}-axis ({trg_grid.pos[dim].units})" ) - template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{trg_grid['s_{dim}'].units}" - idx += 1 + template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{trg_grid.pos[dim].units}" template[f"{trg}/@axes"] = [] for dim in dims[::-1]: template[f"{trg}/@axes"].append(f"axis_{dim}") - if trg_grid["dimensionality"] == 2: + if trg_grid.dimensionality == 1: + template[f"{trg}/data"] = { + "compress": trg_grid.descr_value.magnitude, + "strength": 1, + } + elif trg_grid.dimensionality == 2: template[f"{trg}/data"] = { "compress": np.reshape( - np.asarray( - np.asarray( - ( - trg_grid[contrast_mode[0]] - / np.max(trg_grid[contrast_mode[0]]) - * 255.0 - ), - np.uint32, - ), - np.uint8, - ), - (trg_grid["n_y"], trg_grid["n_x"]), + np.asarray(trg_grid.descr_value.magnitude), + (trg_grid.n["y"], trg_grid.n["x"]), order="C", ), "strength": 1, } - elif trg_grid["dimensionality"] == 3: + else: # 3d template[f"{trg}/data"] = { - "compress": np.squeeze( - np.asarray( - np.asarray( - ( - trg_grid[contrast_mode[0]] - / np.max(trg_grid[contrast_mode[0]], axis=None) - * 255.0 - ), - np.uint32, - ), - np.uint8, - ), - axis=3, + "compress": np.reshape( + np.asarray(trg_grid.descr_value.magnitude), + (trg_grid.n["z"], trg_grid.n["y"], trg_grid.n["x"]), + order="C", ), "strength": 1, } - else: - raise ValueError(f"Hitting unimplemented case for ebsd_roi_overview !") - template[f"{trg}/descriptor"] = contrast_mode[1] - # 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d - template[f"{trg}/data/@long_name"] = f"Signal" - hfive_web_decorate_nxdata(f"{trg}/data", template) return template @@ -392,3 +562,113 @@ def process_roi_phase_ipfs_twod( f"Pixel coordinate along {dim[0]}-axis" ) return template + + +# considered as deprecated + + +def get_scan_point_axis_values(inp: dict, dim_name: str): + is_threed = False + if "dimensionality" in inp.keys(): + if inp["dimensionality"] == 3: + is_threed = True + req_keys = ["grid_type", f"n_{dim_name}", f"s_{dim_name}"] + for key in req_keys: + if key not in inp.keys(): + raise ValueError(f"Unable to find required key {key} in inp !") + + if inp["grid_type"] in [HEXAGONAL_FLAT_TOP_TILING, SQUARE_TILING]: + return np.asarray( + np.linspace( + 0, inp[f"n_{dim_name}"] - 1, num=inp[f"n_{dim_name}"], endpoint=True + ) + * inp[f"s_{dim_name}"], + np.float32, + ) + else: + return None + + +def threed(inp: dict): + """Identify if 3D triboolean.""" + if "dimensionality" in inp.keys(): + if inp["dimensionality"] == 3: + return True + return False + return None + + +def square_grid(inp: dict): + """Identify if square grid with specific assumptions.""" + if ( + inp["grid_type"] == SQUARE_TILING + and inp["tiling"] == REGULAR_TILING + and inp["flight_plan"] == FLIGHT_PLAN + ): + return True + return False + + +def hexagonal_grid(inp: dict): + """Identify if square grid with specific assumptions.""" + if ( + inp["grid_type"] == HEXAGONAL_FLAT_TOP_TILING + and inp["tiling"] == REGULAR_TILING + and inp["flight_plan"] == FLIGHT_PLAN + ): + return True + return False + + +def get_scan_point_coords(inp: dict) -> dict: + """Add scan_point_dim array assuming top-left to bottom-right snake style scanning.""" + is_threed = threed(inp) + req_keys = ["grid_type", "tiling", "flight_plan"] + dims = ["x", "y"] + if is_threed is True: + dims.append("z") + for dim in dims: + req_keys.append(f"n_{dim}") + req_keys.append(f"s_{dim}") + + for key in req_keys: + if key not in inp.keys(): + raise ValueError(f"Unable to find required key {key} in inp !") + + if is_threed is False: + if square_grid(inp) is True: + for dim in dims: + if "scan_point_{dim}" in inp.keys(): + print("WARNING::Overwriting scan_point_{dim} !") + inp["scan_point_x"] = np.tile( + np.linspace(0, inp["n_x"] - 1, num=inp["n_x"], endpoint=True) + * inp["s_x"], + inp["n_y"], + ) + inp["scan_point_y"] = np.repeat( + np.linspace(0, inp["n_y"] - 1, num=inp["n_y"], endpoint=True) + * inp["s_y"], + inp["n_x"], + ) + elif hexagonal_grid(inp) is True: + for dim in dims: + if "scan_point_{dim}" in inp.keys(): + print("WARNING::Overwriting scan_point_{dim} !") + # the following code is only the same as for the sqrgrid because + # typically the tech partners already take into account and export scan step + # values such that for a hexagonal grid one s_{dim} (typically s_y) is sqrt(3)/2*s_{other_dim} ! + inp["scan_point_x"] = np.tile( + np.linspace(0, inp["n_x"] - 1, num=inp["n_x"], endpoint=True) + * inp["s_x"], + inp["n_y"], + ) + inp["scan_point_y"] = np.repeat( + np.linspace(0, inp["n_y"] - 1, num=inp["n_y"], endpoint=True) + * inp["s_y"], + inp["n_x"], + ) + else: + print("WARNING::{__name__} facing an unknown scan strategy !") + else: + print("WARNING::{__name__} not implemented for 3D case !") + return inp diff --git a/src/pynxtools_em/parsers/hfive_apex.py b/src/pynxtools_em/parsers/hfive_apex.py index 99cc5ef..aef35a4 100644 --- a/src/pynxtools_em/parsers/hfive_apex.py +++ b/src/pynxtools_em/parsers/hfive_apex.py @@ -27,15 +27,8 @@ from pynxtools_em.concepts.nxs_image_set import NxImageRealSpaceSet from pynxtools_em.concepts.nxs_object import NxObject from pynxtools_em.concepts.nxs_spectrum_set import NxSpectrumSet -from pynxtools_em.examples.ebsd_database import ( - ASSUME_PHASE_NAME_TO_SPACE_GROUP, - FLIGHT_PLAN, - HEXAGONAL_FLAT_TOP_TILING, - REGULAR_TILING, - SQUARE_TILING, -) +from pynxtools_em.examples.ebsd_database import ASSUME_PHASE_NAME_TO_SPACE_GROUP from pynxtools_em.parsers.hfive_base import HdfFiveBaseParser -from pynxtools_em.utils.get_scan_points import get_scan_point_coords from pynxtools_em.utils.get_xrayline_iupac_names import get_xrayline_candidates from pynxtools_em.utils.hfive_utils import read_strings diff --git a/src/pynxtools_em/parsers/hfive_bruker.py b/src/pynxtools_em/parsers/hfive_bruker.py index 2b15e97..f1ed983 100644 --- a/src/pynxtools_em/parsers/hfive_bruker.py +++ b/src/pynxtools_em/parsers/hfive_bruker.py @@ -22,16 +22,10 @@ import h5py import numpy as np from diffpy.structure import Lattice, Structure -from pynxtools_em.examples.ebsd_database import ( - ASSUME_PHASE_NAME_TO_SPACE_GROUP, - FLIGHT_PLAN, - REGULAR_TILING, - SQUARE_TILING, -) +from pynxtools_em.examples.ebsd_database import ASSUME_PHASE_NAME_TO_SPACE_GROUP from pynxtools_em.parsers.hfive_base import HdfFiveBaseParser # HEXAGONAL_GRID -from pynxtools_em.utils.get_scan_points import get_scan_point_coords from pynxtools_em.utils.hfive_utils import ( EBSD_MAP_SPACEGROUP, all_equal, diff --git a/src/pynxtools_em/parsers/hfive_ebsd.py b/src/pynxtools_em/parsers/hfive_ebsd.py index fb73421..b961e61 100644 --- a/src/pynxtools_em/parsers/hfive_ebsd.py +++ b/src/pynxtools_em/parsers/hfive_ebsd.py @@ -31,7 +31,6 @@ from pynxtools_em.parsers.hfive_base import HdfFiveBaseParser # HEXAGONAL_GRID -from pynxtools_em.utils.get_scan_points import get_scan_point_coords from pynxtools_em.utils.hfive_utils import ( EBSD_MAP_SPACEGROUP, all_equal, diff --git a/src/pynxtools_em/parsers/hfive_oxford.py b/src/pynxtools_em/parsers/hfive_oxford.py index ecbbf63..32be0a6 100644 --- a/src/pynxtools_em/parsers/hfive_oxford.py +++ b/src/pynxtools_em/parsers/hfive_oxford.py @@ -17,18 +17,16 @@ # """Parser mapping concepts and content from Oxford Instruments *.h5oina files on NXem.""" -import mmap from typing import Dict import h5py import numpy as np from diffpy.structure import Lattice, Structure -from pynxtools_em.examples.ebsd_database import ( +from pynxtools_em.methods.ebsd import ( FLIGHT_PLAN, REGULAR_TILING, SQUARE_TILING, -) -from pynxtools_em.methods.ebsd import ( + EbsdPointCloud, ebsd_roi_overview, ebsd_roi_phase_ipf, has_hfive_magic_header, @@ -46,7 +44,6 @@ def __init__(self, file_path: str = "", entry_id: int = 1, verbose: bool = False self.id_mgn: Dict[str, int] = {"entry_id": entry_id, "roi_id": 1} self.verbose = verbose self.prfx = None # template path handling - self.tmp = {} self.version: Dict = { # Dict[str, Dict[str, List[str]]] "trg": { "tech_partner": ["Oxford Instruments"], @@ -121,43 +118,38 @@ def parse(self, template: dict) -> dict: if self.supported: print(f"Parsing via Oxford Instrument HDF5/H5OINA parser...") with h5py.File(f"{self.file_path}", "r") as h5r: - cache_id = 1 slice_ids = sorted(list(h5r["/"])) for slice_id in slice_ids: if slice_id == "1" and f"/{slice_id}/EBSD" in h5r: # non-negative int, parse for now only the first slice self.prfx = f"/{slice_id}" - ckey = self.init_cache(f"ebsd{cache_id}") - self.parse_and_normalize_slice_ebsd_header(h5r, ckey) - self.parse_and_normalize_slice_ebsd_phases(h5r, ckey) - self.parse_and_normalize_slice_ebsd_data(h5r, ckey) - ebsd_roi_overview(self.tmp[ckey], self.id_mgn, template) - ebsd_roi_phase_ipf(self.tmp[ckey], self.id_mgn, template) - self.clear_cache(ckey) + self.ebsd = EbsdPointCloud() + self.parse_and_normalize_slice_ebsd_header(h5r) + self.parse_and_normalize_slice_ebsd_phases(h5r) + self.parse_and_normalize_slice_ebsd_data(h5r) + ebsd_roi_overview(self.ebsd, self.id_mgn, template) + # ebsd_roi_phase_ipf(self.ebsd, self.id_mgn, template) # TODO:Vitesh example return template - def parse_and_normalize_slice_ebsd_header(self, fp, ckey: str): + def parse_and_normalize_slice_ebsd_header(self, fp): grp_name = f"{self.prfx}/EBSD/Header" if f"{grp_name}" not in fp: print(f"Unable to parse {grp_name} !") - self.tmp[ckey] = {} + self.ebsd = EbsdPointCloud() return - # TODO::check if Oxford Instruments always uses SquareGrid like assumed here - self.tmp[ckey]["dimensionality"] = 2 - self.tmp[ckey]["grid_type"] = SQUARE_TILING + # self.grid_type TODO::check if Oxford Instruments always uses SquareGrid like assumed here + self.ebsd.dimensionality = 2 # the next two lines encode the typical assumption that is not reported in tech partner file! - self.tmp[ckey]["tiling"] = REGULAR_TILING - self.tmp[ckey]["flight_plan"] = FLIGHT_PLAN dims = ["X", "Y"] for dim in dims: for req_field in [f"{dim} Cells", f"{dim} Step"]: if f"{grp_name}/{req_field}" not in fp: print(f"Unable to parse {grp_name}/{req_field} !") - self.tmp[ckey] = {} + self.ebsd = EbsdPointCloud() return # X Cells, yes, H5T_NATIVE_INT32, (1, 1), Map: Width in pixels, Line scan: Length in pixels. # Y Cells, yes, H5T_NATIVE_INT32, (1, 1), Map: Height in pixels. Line scan: Always set to 1. @@ -166,36 +158,34 @@ def parse_and_normalize_slice_ebsd_header(self, fp, ckey: str): # Y Step, yes, H5T_NATIVE_FLOAT, (1, 1), Map: Step size along y-axis in micrometers. # Line scan: Always set to 0. for dim in dims: - self.tmp[ckey][f"n_{dim.lower()}"] = fp[f"{grp_name}/{dim} Cells"][0] + self.ebsd.n[f"{dim.lower()}"] = fp[f"{grp_name}/{dim} Cells"][0] if read_strings(fp[f"{grp_name}/{dim} Step"].attrs["Unit"]) == "um": - self.tmp[ckey][f"s_{dim.lower()}"] = ureg.Quantity( + self.ebsd.s[f"{dim.lower()}"] = ureg.Quantity( fp[f"{grp_name}/{dim} Step"][0], ureg.micrometer ) else: print(f"Unexpected {dim} Step Unit attribute !") - self.tmp[ckey] = {} + self.ebsd = EbsdPointCloud() return # TODO::check that all data in the self.oina are consistent - def parse_and_normalize_slice_ebsd_phases(self, fp, ckey: str): + def parse_and_normalize_slice_ebsd_phases(self, fp): """Parse EBSD header section for specific slice.""" grp_name = f"{self.prfx}/EBSD/Header/Phases" if f"{grp_name}" not in fp: print(f"Unable to parse {grp_name} !") - self.tmp[ckey] = {} + self.ebsd = EbsdPointCloud() return # Phases, yes, contains a subgroup for each phase where the name # of each subgroup is the index of the phase starting at 1. phase_ids = sorted(list(fp[f"{grp_name}"]), key=int) - self.tmp[ckey]["phase"] = [] - self.tmp[ckey]["space_group"] = [] - self.tmp[ckey]["phases"] = {} for phase_id in phase_ids: if not phase_id.isdigit(): continue - self.tmp[ckey]["phases"][int(phase_id)] = {} - sub_grp_name = f"/{grp_name}/{phase_id}" + phase_idx = int(phase_id) + self.ebsd.phases[phase_idx] = {} + sub_grp_name = f"{grp_name}/{phase_id}" req_fields = [ "Phase Name", @@ -207,15 +197,15 @@ def parse_and_normalize_slice_ebsd_phases(self, fp, ckey: str): for req_field in req_fields: if f"{sub_grp_name}/{req_field}" not in fp: print(f"Unable to parse {sub_grp_name}/{req_field} !") - self.tmp[ckey] = {} + self.ebsd = EbsdPointCloud() return # Phase Name, yes, H5T_STRING, (1, 1) phase_name = read_strings(fp[f"{sub_grp_name}/Phase Name"][()]) - self.tmp[ckey]["phases"][int(phase_id)]["phase_name"] = phase_name + self.ebsd.phases[phase_idx]["phase_name"] = phase_name # Reference, yes, H5T_STRING, (1, 1), Changed in version 2.0 to mandatory - self.tmp[ckey]["phases"][int(phase_id)]["reference"] = read_strings( + self.ebsd.phases[phase_idx]["reference"] = read_strings( fp[f"{sub_grp_name}/Reference"][()] ) @@ -224,75 +214,69 @@ def parse_and_normalize_slice_ebsd_phases(self, fp, ckey: str): read_strings(fp[f"{sub_grp_name}/Lattice Angles"].attrs["Unit"]) == "rad" ): - alpha_beta_gamma = np.asarray( - fp[f"{sub_grp_name}/Lattice Angles"][:].flatten() - ) - self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] = ( - ureg.Quantity(alpha_beta_gamma, ureg.radian) + angles = np.asarray(fp[f"{sub_grp_name}/Lattice Angles"][:].flatten()) + self.ebsd.phases[phase_idx]["alpha_beta_gamma"] = ureg.Quantity( + angles, ureg.radian ) else: print(f"Unexpected case that Lattice Angles are not reported in rad !") - self.tmp[ckey] = {} + self.ebsd = EbsdPointCloud() return # Lattice Dimensions, yes, H5T_NATIVE_FLOAT, (1, 3), Three columns for a, b and c dimensions in Angstroms if ( read_strings(fp[f"{sub_grp_name}/Lattice Dimensions"].attrs["Unit"]) == "angstrom" ): - a_b_c = np.asarray( - fp[f"{sub_grp_name}/Lattice Dimensions"][:].flatten() - ) - self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] = ureg.Quantity( - a_b_c, ureg.angstrom - ) + abc = np.asarray(fp[f"{sub_grp_name}/Lattice Dimensions"][:].flatten()) + self.ebsd.phases[phase_idx]["a_b_c"] = ureg.Quantity(abc, ureg.angstrom) else: print( f"Unexpected case that Lattice Dimensions are not reported in angstrom !" ) - self.tmp[ckey] = {} + self.ebsd = EbsdPointCloud() return # Space Group, no, H5T_NATIVE_INT32, (1, 1), Space group index. # The attribute Symbol contains the string representation, for example P m -3 m. space_group = int(fp[f"{sub_grp_name}/Space Group"][0]) - self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group - if len(self.tmp[ckey]["space_group"]) > 0: - self.tmp[ckey]["space_group"].append(space_group) + self.ebsd.phases[phase_idx]["space_group"] = space_group + if len(self.ebsd.space_group) > 0: + self.ebsd.space_group.append(space_group) else: - self.tmp[ckey]["space_group"] = [space_group] + self.ebsd.space_group = [space_group] - if len(self.tmp[ckey]["phase"]) > 0: - self.tmp[ckey]["phase"].append( + if len(self.ebsd.phase) > 0: + self.ebsd.phase.append( Structure( title=phase_name, atoms=None, lattice=Lattice( - a_b_c[0], - a_b_c[1], - a_b_c[2], - alpha_beta_gamma[0], - alpha_beta_gamma[1], - alpha_beta_gamma[2], + abc[0], + abc[1], + abc[2], + angles[0], + angles[1], + angles[2], ), ) ) else: - self.tmp[ckey]["phase"] = [ + self.ebsd.phase = [ Structure( title=phase_name, atoms=None, lattice=Lattice( - a_b_c[0], - a_b_c[1], - a_b_c[2], - alpha_beta_gamma[0], - alpha_beta_gamma[1], - alpha_beta_gamma[2], + abc[0], + abc[1], + abc[2], + angles[0], + angles[1], + angles[2], ), ) ] - def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str): + def parse_and_normalize_slice_ebsd_data(self, fp): # https://github.com/oinanoanalysis/h5oina/blob/master/H5OINAFile.md grp_name = f"{self.prfx}/EBSD/Data" if f"{grp_name}" not in fp: @@ -302,32 +286,28 @@ def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str): for req_field in req_fields: if f"{grp_name}/{req_field}" not in fp: print(f"Unable to parse {grp_name}/{req_field} !") - self.tmp[ckey] = {} + self.ebsd = EbsdPointCloud() return # Euler, yes, H5T_NATIVE_FLOAT, (size, 3), Orientation of Crystal (CS2) to Sample-Surface (CS1). if read_strings(fp[f"{grp_name}/Euler"].attrs["Unit"]) == "rad": - self.tmp[ckey]["euler"] = ureg.Quantity( + self.ebsd.euler = ureg.Quantity( np.asarray(fp[f"{grp_name}/Euler"]), ureg.radian ) else: print(f"Unexpected case that Euler angle are not reported in rad !") - self.tmp[ckey] = {} + self.ebsd = EbsdPointCloud() return - self.tmp[ckey]["euler"] = apply_euler_space_symmetry(self.tmp[ckey]["euler"]) + self.ebsd.euler = apply_euler_space_symmetry(self.ebsd.euler) # no normalization needed, also in NXem the null model notIndexed is phase_identifier 0 - self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"], np.int32) + self.ebsd.phase_id = np.asarray(fp[f"{grp_name}/Phase"], np.int32) # normalize pixel coordinates to physical positions even though the origin can still dangle somewhere # expected is order on x is first all possible x values while y == 0 # followed by as many copies of this linear sequence for each y increment # no action needed Oxford reports already the pixel coordinate multiplied by step - if self.tmp[ckey]["grid_type"] != SQUARE_TILING: - print( - f"WARNING: Check carefully correct interpretation of scan_point coords!" - ) # Phase, yes, H5T_NATIVE_INT32, (size, 1), Index of phase, 0 if not indexed # X, no, H5T_NATIVE_FLOAT, (size, 1), X position of each pixel in micrometers (origin: top left corner) # Y, no, H5T_NATIVE_FLOAT, (size, 1), Y position of each pixel in micrometers (origin: top left corner) @@ -336,10 +316,11 @@ def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str): # inconsistency f32 in file although specification states float dims = ["X", "Y"] for dim in dims: - self.tmp[ckey][f"scan_point_{dim.lower()}"] = np.asarray( - fp[f"{grp_name}/{dim}"] + self.ebsd.pos[f"{dim.lower()}"] = ureg.Quantity( + np.asarray(fp[f"{grp_name}/{dim}"]), ureg.micrometer ) - self.tmp[ckey]["bc"] = np.asarray(fp[f"{grp_name}/Band Contrast"], np.int32) + self.ebsd.descr_type = "band_contrast" + self.ebsd.descr_value = np.asarray(fp[f"{grp_name}/Band Contrast"], np.int32) # inconsistency uint8 in file although specification states should be int32 # promoting uint8 to int32 no problem diff --git a/src/pynxtools_em/reader.py b/src/pynxtools_em/reader.py index 4e01628..a0e85c3 100644 --- a/src/pynxtools_em/reader.py +++ b/src/pynxtools_em/reader.py @@ -24,14 +24,14 @@ from pynxtools.dataconverter.readers.base.reader import BaseReader from pynxtools_em.concepts.nxs_concepts import NxEmAppDef -from pynxtools_em.methods.eds import NxEmNxsPyxemParser from pynxtools_em.parsers.conventions import NxEmConventionParser -from pynxtools_em.parsers.hfive_apex import HdfFiveEdaxApexParser -from pynxtools_em.parsers.hfive_bruker import HdfFiveBrukerEspritParser -from pynxtools_em.parsers.hfive_dreamthreed import HdfFiveDreamThreedParser -from pynxtools_em.parsers.hfive_ebsd import HdfFiveEbsdCommunityParser -from pynxtools_em.parsers.hfive_edax import HdfFiveEdaxOimAnalysisParser -from pynxtools_em.parsers.hfive_emsoft import HdfFiveEmSoftParser + +# from pynxtools_em.parsers.hfive_apex import HdfFiveEdaxApexParser +# from pynxtools_em.parsers.hfive_bruker import HdfFiveBrukerEspritParser +# from pynxtools_em.parsers.hfive_dreamthreed import HdfFiveDreamThreedParser +# from pynxtools_em.parsers.hfive_ebsd import HdfFiveEbsdCommunityParser +# from pynxtools_em.parsers.hfive_edax import HdfFiveEdaxOimAnalysisParser +# from pynxtools_em.parsers.hfive_emsoft import HdfFiveEmSoftParser from pynxtools_em.parsers.hfive_oxford import HdfFiveOxfordInstrumentsParser from pynxtools_em.parsers.image_png_protochips import ProtochipsPngSetParser from pynxtools_em.parsers.image_tiff_hitachi import HitachiTiffParser diff --git a/src/pynxtools_em/utils/get_scan_points.py b/src/pynxtools_em/utils/get_scan_points.py deleted file mode 100644 index 5575e4a..0000000 --- a/src/pynxtools_em/utils/get_scan_points.py +++ /dev/null @@ -1,134 +0,0 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Identify likely scan_point_positions for specific EBSD grid types.""" - -import numpy as np - -from pynxtools_em.examples.ebsd_database import ( - FLIGHT_PLAN, - HEXAGONAL_FLAT_TOP_TILING, - REGULAR_TILING, - SQUARE_TILING, -) - - -def get_scan_point_axis_values(inp: dict, dim_name: str): - is_threed = False - if "dimensionality" in inp.keys(): - if inp["dimensionality"] == 3: - is_threed = True - req_keys = ["grid_type", f"n_{dim_name}", f"s_{dim_name}"] - for key in req_keys: - if key not in inp.keys(): - raise ValueError(f"Unable to find required key {key} in inp !") - - if inp["grid_type"] in [HEXAGONAL_FLAT_TOP_TILING, SQUARE_TILING]: - return np.asarray( - np.linspace( - 0, inp[f"n_{dim_name}"] - 1, num=inp[f"n_{dim_name}"], endpoint=True - ) - * inp[f"s_{dim_name}"], - np.float32, - ) - else: - return None - - -def threed(inp: dict): - """Identify if 3D triboolean.""" - if "dimensionality" in inp.keys(): - if inp["dimensionality"] == 3: - return True - return False - return None - - -def square_grid(inp: dict): - """Identify if square grid with specific assumptions.""" - if ( - inp["grid_type"] == SQUARE_TILING - and inp["tiling"] == REGULAR_TILING - and inp["flight_plan"] == FLIGHT_PLAN - ): - return True - return False - - -def hexagonal_grid(inp: dict): - """Identify if square grid with specific assumptions.""" - if ( - inp["grid_type"] == HEXAGONAL_FLAT_TOP_TILING - and inp["tiling"] == REGULAR_TILING - and inp["flight_plan"] == FLIGHT_PLAN - ): - return True - return False - - -def get_scan_point_coords(inp: dict) -> dict: - """Add scan_point_dim array assuming top-left to bottom-right snake style scanning.""" - is_threed = threed(inp) - req_keys = ["grid_type", "tiling", "flight_plan"] - dims = ["x", "y"] - if is_threed is True: - dims.append("z") - for dim in dims: - req_keys.append(f"n_{dim}") - req_keys.append(f"s_{dim}") - - for key in req_keys: - if key not in inp.keys(): - raise ValueError(f"Unable to find required key {key} in inp !") - - if is_threed is False: - if square_grid(inp) is True: - for dim in dims: - if "scan_point_{dim}" in inp.keys(): - print("WARNING::Overwriting scan_point_{dim} !") - inp["scan_point_x"] = np.tile( - np.linspace(0, inp["n_x"] - 1, num=inp["n_x"], endpoint=True) - * inp["s_x"], - inp["n_y"], - ) - inp["scan_point_y"] = np.repeat( - np.linspace(0, inp["n_y"] - 1, num=inp["n_y"], endpoint=True) - * inp["s_y"], - inp["n_x"], - ) - elif hexagonal_grid(inp) is True: - for dim in dims: - if "scan_point_{dim}" in inp.keys(): - print("WARNING::Overwriting scan_point_{dim} !") - # the following code is only the same as for the sqrgrid because - # typically the tech partners already take into account and export scan step - # values such that for a hexagonal grid one s_{dim} (typically s_y) is sqrt(3)/2*s_{other_dim} ! - inp["scan_point_x"] = np.tile( - np.linspace(0, inp["n_x"] - 1, num=inp["n_x"], endpoint=True) - * inp["s_x"], - inp["n_y"], - ) - inp["scan_point_y"] = np.repeat( - np.linspace(0, inp["n_y"] - 1, num=inp["n_y"], endpoint=True) - * inp["s_y"], - inp["n_x"], - ) - else: - print("WARNING::{__name__} facing an unknown scan strategy !") - else: - print("WARNING::{__name__} not implemented for 3D case !") - return inp diff --git a/src/pynxtools_em/utils/get_sqr_grid.py b/src/pynxtools_em/utils/get_sqr_grid.py deleted file mode 100644 index 158539c..0000000 --- a/src/pynxtools_em/utils/get_sqr_grid.py +++ /dev/null @@ -1,183 +0,0 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Discretize point cloud in R^d (d=2, 3) with mark data to grid with square/cube bins.""" - -import numpy as np -from pynxtools_em.utils.get_scan_points import hexagonal_grid, square_grid, threed -from scipy.spatial import KDTree - - -def regrid_onto_equisized_scan_points(src_grid: dict, max_edge_length: int) -> dict: - """Inspect grid_type, dimensionality, point locations, and mark src_grid, map then.""" - is_threed = threed(src_grid) - req_keys = ["grid_type", "tiling", "flight_plan"] - dims = ["x", "y"] - if is_threed is True: - dims.append("z") - for dim in dims: - req_keys.append(f"scan_point_{dim}") - req_keys.append(f"n_{dim}") - req_keys.append(f"s_{dim}") - - trg_grid = {} - for key in req_keys: - if key not in src_grid.keys(): - raise ValueError(f"Unable to find required key {key} in src_grid !") - - # take discretization of the source grid as a guide for the target_grid - # optimization possible if square grid and matching maximum_extent - - max_extent = None - if is_threed is False: - max_extent = np.max((src_grid["n_x"], src_grid["n_y"])) - else: - max_extent = np.max((src_grid["n_x"], src_grid["n_y"], src_grid["n_z"])) - - if square_grid(src_grid) is True: - if max_extent <= max_edge_length: - return src_grid - else: - # too large square grid has to be discretized and capped - # cap to the maximum extent to comply with h5web technical constraints - max_extent = max_edge_length - elif hexagonal_grid(src_grid) is True: - if max_extent > max_edge_length: - max_extent = max_edge_length - else: - raise ValueError(f"Facing an unsupported grid type !") - - # all non-square grids or too large square grids will be - # discretized onto a regular grid with square or cubic pixel/voxel - aabb = [] - for dim in dims: - aabb.append(np.min(src_grid[f"scan_point_{dim}"] - 0.5 * src_grid[f"s_{dim}"])) - aabb.append(np.max(src_grid[f"scan_point_{dim}"] + 0.5 * src_grid[f"s_{dim}"])) - print(f"{aabb}") - - if is_threed is False: - if aabb[1] - aabb[0] >= aabb[3] - aabb[2]: - trg_sxy = (aabb[1] - aabb[0]) / max_extent - trg_nxy = [max_extent, int(np.ceil((aabb[3] - aabb[2]) / trg_sxy))] - else: - trg_sxy = (aabb[3] - aabb[2]) / max_extent - trg_nxy = [int(np.ceil((aabb[1] - aabb[0]) / trg_sxy)), max_extent] - print( - f"H5Web default plot generation, scaling src_nxy " - f"{[src_grid['n_x'], src_grid['n_y']]}, trg_nxy {trg_nxy}" - ) - # the above estimate is not exactly correct (may create a slight real space shift) - # of the EBSD map TODO:: regrid the real world axis-aligned bounding box aabb with - # a regular tiling of squares or hexagons - # https://stackoverflow.com/questions/18982650/differences-between-matlab-and-numpy-and-pythons-round-function - # MTex/Matlab round not exactly the same as numpy round but reasonably close - - # scan point positions were normalized by tech partner parsers such that they - # always build on pixel coordinates calibrated for step size not by giving absolute positions - # in the sample surface frame of reference as this is typically not yet consistently documented - # because we assume in addition that we always start at the top left corner the zeroth/first - # coordinate is always 0., 0. ! - trg_xy = np.column_stack( - ( - np.tile( - np.linspace(0, trg_nxy[0] - 1, num=trg_nxy[0], endpoint=True) - * trg_sxy, - trg_nxy[1], - ), - np.repeat( - np.linspace(0, trg_nxy[1] - 1, num=trg_nxy[1], endpoint=True) - * trg_sxy, - trg_nxy[0], - ), - ) - ) - # TODO:: if scan_point_{dim} are calibrated this approach - # here would shift the origin to 0, 0 implicitly which may not be desired - print(f"trg_xy np.shape {np.shape(trg_xy)}") - tree = KDTree( - np.column_stack((src_grid["scan_point_x"], src_grid["scan_point_y"])) - ) - d, idx = tree.query(trg_xy, k=1) - if np.sum(idx == tree.n) > 0: - raise ValueError(f"kdtree query left some query points without a neighbor!") - del d - del tree - - # rebuild src_grid container with only the relevant src_grid selected from src_grid - for key in src_grid.keys(): - if key == "euler": - trg_grid[key] = np.empty((np.shape(trg_xy)[0], 3), np.float32) - trg_grid[key].fill(np.nan) - trg_grid[key] = src_grid["euler"][idx, :] - if np.isnan(trg_grid[key]).any() is True: - raise ValueError( - f"Downsampling of the point cloud left " - f"pixels without mark data {key} !" - ) - print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}") - elif key == "phase_id" or key == "bc": - trg_grid[key] = np.empty((np.shape(trg_xy)[0],), np.int32) - trg_grid[key].fill(np.int32(-2)) - # pyxem_id is at least -1, bc is typically positive - trg_grid[key] = src_grid[key][idx] - if np.sum(trg_grid[key] == -2) > 0: - raise ValueError( - f"Downsampling of the point cloud left " - f"pixels without mark data {key} !" - ) - print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}") - elif key == "ci" or key == "mad": - trg_grid[key] = np.empty((np.shape(trg_xy)[0],), np.float32) - trg_grid[key].fill(np.nan) - trg_grid[key] = src_grid[key][idx] - print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}") - if np.isnan(trg_grid[key]).any() is True: - raise ValueError( - f"Downsampling of the point cloud left " - f"pixels without mark data {key} !" - ) - elif key not in [ - "n_x", - "n_y", - "n_z", - "s_x", - "s_y", - "s_z", - "scan_point_x", - "scan_point_y", - "scan_point_z", - ]: - trg_grid[key] = src_grid[key] - # print(f"WARNING:: src_grid[{key}] is mapped as is on trg_grid[{key}] !") - # print(f"final np.shape(trg_grid[{key}]) {np.shape(trg_grid[key])}") - # else: - # print( - # f"WARNING:: src_grid[{key}] is not yet mapped on trg_grid[{key}] !" - # ) - trg_grid["n_x"] = trg_nxy[0] - trg_grid["n_y"] = trg_nxy[1] - trg_grid["s_x"] = trg_sxy - trg_grid["s_y"] = trg_sxy - trg_grid["scan_point_x"] = trg_xy[0] - trg_grid["scan_point_y"] = trg_xy[1] - # TODO::need to update scan_point_{dim} - return trg_grid - else: - raise ValueError( - f"The 3D discretization is currently not implemented because " - f"we do not know of any large enough dataset to test it !" - ) diff --git a/src/pynxtools_em/utils/hfive_web_constants.py b/src/pynxtools_em/utils/hfive_web.py similarity index 97% rename from src/pynxtools_em/utils/hfive_web_constants.py rename to src/pynxtools_em/utils/hfive_web.py index 26553d1..9f0a129 100755 --- a/src/pynxtools_em/utils/hfive_web_constants.py +++ b/src/pynxtools_em/utils/hfive_web.py @@ -16,7 +16,5 @@ # """Constants relevant when working with H5Web.""" -import numpy as np - HFIVE_WEB_MAXIMUM_ROI = 2**14 - 1 HFIVE_WEB_MAXIMUM_RGB = 2**11 - 1