Skip to content

Commit

Permalink
Adds total spectra extraction (#35)
Browse files Browse the repository at this point in the history
* adds total spectra extraction

* adds test for get_total_spectra

* fixes storage request size for total spectra array

* Fix test for get_total_spectra

* Default spectra extraction to a per-pixel level

* Make sure every pixel has the full TOF range of the panel

* Even more size saving by using np.uint8

* Add type annotation for range_pad in extract_total_spectra

* Spacing

* Store equivalent m/z values for each FOV for plotting purposes

* Clarify fov_files to fov_metadata, and name to fov_name

* Documentation fi

---------

Co-authored-by: alex-l-kong <alkong@ucdavis.edu>
Co-authored-by: alex-l-kong <31424707+alex-l-kong@users.noreply.github.com>
Co-authored-by: Alex Kong <alkong@stanford.edu>
  • Loading branch information
4 people authored Oct 8, 2024
1 parent b8782bf commit 5b81528
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 1 deletion.
72 changes: 72 additions & 0 deletions src/mibi_bin_tools/_extract_bin.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,75 @@ cdef MAXINDEX_t _extract_total_counts(const char* filename):
return counts


cdef _extract_total_spectra(const char* filename, DTYPE_t low_range, DTYPE_t high_range):
"""Extract total spectra from bin file
Args:
filename (const char*):
Name of bin file to extract
low_range (np.uint16_t):
The lowest time interval to consider
high_range (np.uint16_t):
The highest time interval to consider
"""
cdef DTYPE_t num_x, num_y, num_trig, num_frames, desc_len, trig, num_pulses, pulse, time
cdef MAXINDEX_t data_start, pix

# 10MB buffer
cdef MAXINDEX_t BUFFER_SIZE = 10 * 1024 * 1024
cdef char* file_buffer = <char*> malloc(BUFFER_SIZE * sizeof(char))
cdef MAXINDEX_t buffer_idx = 0

# open file
cdef FILE* fp
fp = fopen(filename, "rb")

# note, if cython has packed structs, this would be easier
# or even macros tbh
fseek(fp, 0x6, SEEK_SET)
fread(&num_x, sizeof(DTYPE_t), 1, fp)
fread(&num_y, sizeof(DTYPE_t), 1, fp)
fread(&num_trig, sizeof(DTYPE_t), 1, fp)
fread(&num_frames, sizeof(DTYPE_t), 1, fp)
fseek(fp, 0x2, SEEK_CUR)
fread(&desc_len, sizeof(DTYPE_t), 1, fp)

spectra_by_pixel = \
cvarray(
shape=(<MAXINDEX_t>(num_x) * <MAXINDEX_t>(num_y),
<MAXINDEX_t>(high_range) - <MAXINDEX_t>(low_range) + 1),
itemsize=sizeof(SMALL_t),
format='B'
)
cdef SMALL_t[:, :] spectra_by_pixel_view = spectra_by_pixel
spectra_by_pixel_view[:, :] = 0

data_start = \
<MAXINDEX_t>(num_x) * <MAXINDEX_t>(num_y) * <MAXINDEX_t>(num_frames) * 8 + desc_len + 0x12

fseek(fp, data_start, SEEK_SET)
fread(file_buffer, sizeof(char), BUFFER_SIZE, fp)
for pix in range(<MAXINDEX_t>(num_x) * <MAXINDEX_t>(num_y)):
for trig in range(num_trig):
_check_buffer_refill(fp, file_buffer, &buffer_idx, 0x8 * sizeof(char), BUFFER_SIZE)
memcpy(&num_pulses, file_buffer + buffer_idx + 0x6, sizeof(time))
buffer_idx += 0x8
for pulse in range(num_pulses):
_check_buffer_refill(fp, file_buffer, &buffer_idx, 0x5 * sizeof(char), BUFFER_SIZE)
memcpy(&time, file_buffer + buffer_idx, sizeof(time))
buffer_idx += 0x5

if time >= low_range and time <= high_range:
spectra_by_pixel_view[pix, <MAXINDEX_t>(time) - <MAXINDEX_t>(low_range)] += 1

fclose(fp)
free(file_buffer)

return np.asarray(spectra_by_pixel, dtype=np.uint8).reshape(
(<MAXINDEX_t>num_x, <MAXINDEX_t>num_y, <MAXINDEX_t>high_range - <MAXINDEX_t>low_range + 1)
)


def c_extract_bin(char* filename, DTYPE_t[:] low_range,
DTYPE_t[:] high_range, SMALL_t[:] calc_intensity):
return np.asarray(
Expand All @@ -437,3 +506,6 @@ def c_pulse_height_vs_positive_pixel(char* filename, DTYPE_t low_range, DTYPE_t
def c_total_counts(char* filename):
counts = _extract_total_counts(filename)
return int(counts)

def c_total_spectra(char* filename, DTYPE_t low_range, DTYPE_t high_range):
return _extract_total_spectra(filename, low_range, high_range)
83 changes: 83 additions & 0 deletions src/mibi_bin_tools/bin_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,27 @@ def _mass2tof(masses_arr: np.ndarray, mass_offset: float, mass_gain: float,
return (mass_gain * np.sqrt(masses_arr) + mass_offset) / time_res


def _tof2mass(tof_arr: np.ndarray, mass_offset: float, mass_gain: float,
time_res: float) -> np.ndarray:
"""Convert array of time of flight values to equivalent m/z
Args:
tof_arr (array_like):
Array of time of flight values
mass_offset (float):
Mass offset for parabolic transformation
mass_gain (float):
Mass gain for parabolic transformation
time_res (float):
Time resolution for scaling parabolic transformation
Returns:
array_like:
Array of m/z values; indicies paried to `tof_range`
"""
return (((time_res * tof_arr) - mass_offset) / mass_gain) ** 2


def _set_tof_ranges(fov: Dict[str, Any], higher: np.ndarray, lower: np.ndarray,
time_res: float) -> None:
"""Converts and stores provided mass ranges as time of flight ranges within fov metadata
Expand Down Expand Up @@ -521,3 +542,65 @@ def get_total_counts(data_dir: str, include_fovs: Union[List[str], None] = None)
outs = {name: _extract_bin.c_total_counts(bytes(bf, 'utf-8')) for name, bf in bin_files}

return outs


def get_total_spectra(data_dir: str, include_fovs: Union[List[str], None] = None,
panel_df: pd.DataFrame = None, range_pad: float = 0.5):
"""Retrieves total spectra for each field of view
Args:
data_dir (str | PathLike):
Directory containing bin files as well as accompanying json metadata files
include_fovs (List | None):
List of fovs to include. Includes all if None.
panel_df (pd.DataFrame | None):
If not None, get default callibration information
range_pad (float):
Mass padding below the lowest and highest masses to consider when binning.
The time-of-flight array go from TOF of (lowest mass - 0.5) to (highest_mass + 0.5).
Returns:
tuple (dict, dict, list):
dict of total spectra and the corresponding low and high ranges, with fov names as keys
"""
if range_pad < 0:
raise ValueError("range_pad must be >= 0")

fov_metadata = _find_bin_files(data_dir, include_fovs)
if panel_df is not None:
for fov_info in fov_metadata.values():
_fill_fov_metadata(data_dir, fov_info, panel_df, False, 500e-6)

bin_metadata = list(fov_metadata.items())

# TODO: this assumes the panel_df is sorted
lowest_mass = panel_df.loc[0, "Stop"] - range_pad
highest_mass = panel_df.loc[panel_df.shape[0] - 1, "Stop"] + range_pad

# store the spectra, as well as the time intervals for each FOV
spectra = {}
tof_interval = {}
for fov_name, fov_info in bin_metadata:
# compute the low and high boundaries, this will differ per FOV
mass_offset = fov_info["mass_offset"]
mass_gain = fov_info["mass_gain"]
tof_boundaries = _mass2tof(
np.array([lowest_mass, highest_mass]), mass_offset, mass_gain, 500e-6
).astype(np.uint16)

# set the boundaries
tof_interval[fov_name] = tof_boundaries

# extract the spectra on an individual basis per channel
spectra[fov_name] = _extract_bin.c_total_spectra(
bytes(os.path.join(data_dir, fov_info["bin"]), "utf-8"),
tof_boundaries[0],
tof_boundaries[1]
)

# generate equivalent m/z values
tof_arr = np.arange(tof_boundaries[0], tof_boundaries[1] + 1)
mass_arr = _tof2mass(tof_arr, mass_offset, mass_gain, 500e-6)
fov_info["mass_spectra_points"] = mass_arr

return spectra, tof_interval, fov_metadata
21 changes: 20 additions & 1 deletion tests/bin_files_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,4 +343,23 @@ def test_get_total_counts(test_dir, fov):
bytes(bf, 'utf-8'), np.array([0], np.uint16),
np.array([-1], dtype=np.uint16), np.array([False], dtype=np.uint8)
)
assert (total_counts['fov-1-scan-1'] == np.sum(total_ion_image[0, :, :, :]))
assert total_counts['fov-1-scan-1'] == np.sum(total_ion_image[0, :, :, :])


@parametrize_with_cases('test_dir, fov', cases=FovMetadataTestFiles)
@parametrize_with_cases('panel', cases=FovMetadataTestPanels, has_tag='specified')
def test_get_total_spectra(test_dir, fov, panel):
# ensure range_pad is positive
with pytest.raises(ValueError):
bin_files.get_total_spectra(
test_dir,
[fov['json'].split('.')[0]],
panel,
range_pad=-0.1
)

bin_files.get_total_spectra(
test_dir,
[fov['json'].split('.')[0]],
panel
)

0 comments on commit 5b81528

Please sign in to comment.