Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Utilize TSAI tiler to crop out FOVs #20

Merged
merged 28 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
19bdb99
Add internal functionality for segmenting out glycan cores
alex-l-kong Nov 16, 2023
f4b33cf
Format using pre-commit hooks
alex-l-kong Nov 17, 2023
2f98ec5
Include core cropping process in Jupyter notebook, and update glycan …
alex-l-kong Nov 17, 2023
d863f9a
Add commented line that user can run to crop out all cores on a TMA s…
alex-l-kong Nov 20, 2023
388e623
Add functionality to visualize the cropping mask after completion
alex-l-kong Nov 20, 2023
897dda9
Change cores_to_crop to include names that are more TSAI-friendly
alex-l-kong Nov 20, 2023
3e7d11d
PYCODESTYLE and add test in case certain cores don't map onto the poslog
alex-l-kong Nov 21, 2023
2c0ca43
Ensure line length is 100 like the other angelolab repos (not 110)
alex-l-kong Nov 21, 2023
6fe1ca5
Use rng fixture to generate random nums
alex-l-kong Nov 21, 2023
6cc4909
Simplify cropping logic for all cores, allow user to set cores_to_cro…
alex-l-kong Nov 21, 2023
9d8a6a6
Only read in the columns we need for region_core_info
alex-l-kong Nov 21, 2023
55ee0af
Update core cropping process to include mask generation prior
alex-l-kong Nov 27, 2023
765fd30
The TSAI tiler, unfortunately, does not support .tiff files
alex-l-kong Nov 28, 2023
a505f33
Add type annotations
alex-l-kong Nov 28, 2023
ba0052f
Update core cropping functionality to support a list of poslog files
alex-l-kong Nov 29, 2023
b2970cc
Ensure regex is R(\d+)X, not R(\d+)XY
alex-l-kong Nov 29, 2023
d32b133
Pre-generate all the individual crop masks, then load in as necessary
alex-l-kong Dec 5, 2023
2058c9e
Update the notebook with the new pre-generated mask workflow
alex-l-kong Dec 5, 2023
9ec12a9
Ensure masks are of dtype np.uint32
alex-l-kong Jan 17, 2024
bf8c32f
Ensure glycan_crop_save_dir is set as a pathlib.Path
alex-l-kong Jan 17, 2024
83e5fc6
Change from np.uint32 to np.uint8
alex-l-kong Jan 18, 2024
4baab51
Fix path specification for load_glycan_crop_masks
alex-l-kong Jan 23, 2024
0f916c3
Add core-cropping notebook (will be renamed)
alex-l-kong May 15, 2024
0cf5173
Don't include full metadata
alex-l-kong May 17, 2024
7d487f9
Add templates.json file for TSAI tiler
alex-l-kong May 17, 2024
4a6ad62
Document where to find the template.json file for TSAI tiler
alex-l-kong May 17, 2024
7540686
Map to closest core in case centroid in mapping dict was "missed" by …
alex-l-kong Aug 29, 2024
664d01b
Fix remaining tests
alex-l-kong Nov 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 84 additions & 2 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
"""Shared Fixtures for tests."""

import json
from pathlib import Path
from typing import Generator
from typing import Generator, List

import numpy as np
import pandas as pd
import pytest
import skimage.io as io
import xarray as xr
from pyimzml.ImzMLParser import ImzMLParser
from pyimzml.ImzMLWriter import ImzMLWriter
Expand All @@ -26,7 +28,8 @@ def imz_data(tmp_path_factory: TempPathFactory, rng: np.random.Generator) -> Imz
img_dim: int = 10

# Generate random integers n for each coordinate (10 x 10). These will be used for creating
# random m/z and intensity values of length n. Lengths n are distributed along the standard gamma.
# random m/z and intensity values of length n.
# Lengths n are distributed along the standard gamma.
ns: np.ndarray = np.rint(rng.standard_gamma(shape=2.5, size=(img_dim**2)) * 100).astype(int)

# Generate random masses and sample different amounts of them, so we get duplicates
Expand Down Expand Up @@ -114,3 +117,82 @@ def image_xr(rng: np.random.Generator, library: pd.DataFrame) -> Generator[xr.Da
dims=["peak", "x", "y"],
)
yield img_xr


@pytest.fixture(scope="session")
def glycan_img_path(tmp_path_factory: TempPathFactory, imz_data: ImzMLParser, rng: np.random.Generator):
coords: np.ndarray = np.array([coord[:2] for coord in imz_data.coordinates])

glycan_img: np.ndarray = np.zeros((10, 10))
glycan_img[coords[:, 1] - 1, coords[:, 0] - 1] = rng.random(coords.shape[0])

glycan_img_file: Path = tmp_path_factory.mktemp("glycan_imgs") / "glycan_img.tiff"
io.imsave(glycan_img_file, glycan_img)

yield glycan_img_file


@pytest.fixture(scope="session")
def poslog_path(tmp_path_factory: TempPathFactory, imz_data: ImzMLParser):
columns_write: List[str] = ["Date", "Time", "Region", "PosX", "PosY", "X", "Y", "Z"]
poslog_data: pd.DataFrame = pd.DataFrame(
np.random.rand(len(imz_data.coordinates) + 2, len(columns_write)), columns=columns_write
alex-l-kong marked this conversation as resolved.
Show resolved Hide resolved
)
np.array([coord[:2] for coord in imz_data.coordinates])

poslog_regions: List[str] = []
for i in np.arange(2):
poslog_regions.append("__")
poslog_regions.extend([f"R{i}XY"] * 50)
poslog_data["Region"] = poslog_regions

poslog_file: Path = tmp_path_factory.mktemp("poslogs") / "poslog.txt"
poslog_data.to_csv(poslog_file, header=None, index=False, sep=" ", mode="w", columns=columns_write)

yield poslog_file


@pytest.fixture(scope="session")
def centroid_path(tmp_path_factory: TempPathFactory, imz_data: ImzMLParser):
coords: np.ndarray = np.array([coord[:2] for coord in imz_data.coordinates])
center_coord_indices: np.ndarray = np.arange(25, coords.shape[0], 50)

centroid_data: dict = {}
centroid_data["exportDateTime"] = None
centroid_data["fovs"] = []
for i, cci in enumerate(center_coord_indices):
center_coord = coords[cci, :]
center_point_data = {
"name": f"Region{i}",
"centerPointPixels": {"x": center_coord[0].item(), "y": center_coord[1].item()},
}
centroid_data["fovs"].append(center_point_data)

centroid_file: Path = tmp_path_factory.mktemp("centroids") / "centroids.json"
with open(centroid_file, "w") as outfile:
outfile.write(json.dumps(centroid_data))

yield centroid_file


@pytest.fixture(scope="session")
def bad_centroid_path(tmp_path_factory: TempPathFactory, imz_data: ImzMLParser):
coords: np.ndarray = np.array([coord[:2] for coord in imz_data.coordinates])
center_coord_indices: np.ndarray = np.arange(25, coords.shape[0], 50)

centroid_data: dict = {}
centroid_data["exportDateTime"] = None
centroid_data["fovs"] = []
for i, cci in enumerate(center_coord_indices):
center_coord = coords[cci, :]
center_point_data = {
"name": f"Region{i}",
"centerPointPixels": {"x": center_coord[0].item() + 10000, "y": center_coord[1].item() + 10000},
}
centroid_data["fovs"].append(center_point_data)

centroid_file: Path = tmp_path_factory.mktemp("centroids") / "centroids.json"
with open(centroid_file, "w") as outfile:
outfile.write(json.dumps(centroid_data))

yield centroid_file
92 changes: 91 additions & 1 deletion src/maldi_tools/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,20 @@

"""

import json
import os
from functools import partial
from operator import itemgetter
from pathlib import Path
from typing import Dict, Tuple
from typing import Dict, List, Tuple

import numpy as np
import pandas as pd
import xarray as xr
from alpineer.io_utils import validate_paths
from pyimzml.ImzMLParser import ImzMLParser
from scipy import signal
from skimage.io import imread
from tqdm.notebook import tqdm

from maldi_tools import plotting
Expand Down Expand Up @@ -337,3 +340,90 @@ def library_matching(
peak_df.to_csv(lib_matched_dir / "matched_peaks.csv")

return peak_df


def map_coordinates_to_core_name(
imz_data: ImzMLParser,
centroid_path: Path,
poslog_path: Path,
):
"""Maps each scanned coordinate on a slide to their respective core name (created by TSAI tiler).

Args:
---
imz_data (ImzMLParser): The imzML object, needed for coordinate identification.
centroid_path (Path): A JSON file mapping each core name to their respective centroid.
Generated by the TSAI tiler.
poslog_path (Path): A .txt file listing all the coordinates scanned,
needed to map coordinates to their respective core.

Returns:
-------
pd.DataFrame:
Maps each coordinate to their core
"""
validate_paths([centroid_path, poslog_path])

coords = np.array([coord[:2] for coord in imz_data.coordinates])
region_core_info = pd.read_csv(
poslog_path,
delimiter=" ",
names=["Date", "Time", "Region", "PosX", "PosY", "X", "Y", "Z"],
alex-l-kong marked this conversation as resolved.
Show resolved Hide resolved
index_col=False,
skiprows=1,
)
region_core_info = region_core_info[region_core_info["Region"] != "__"].copy()

region_core_info = region_core_info[["Date", "Time", "Region", "X", "Y"]]
region_core_info["Region"] = region_core_info["Region"].str.extract(r"^(.*?)(?=X)")
region_core_info[["X", "Y"]] = coords

with open(centroid_path, "r") as infile:
centroid_data = json.load(infile)

core_region_mapping = {}
for core in centroid_data["fovs"]:
center_point = core["centerPointPixels"]
region_match = region_core_info.loc[
(region_core_info["X"] == center_point["x"]) & (region_core_info["Y"] == center_point["y"]),
"Region",
]
if region_match.shape[0] == 0:
raise ValueError(
f"Could not find mapping of core {core['name']} to any location on the slide, "
"please verify that you positioned the central point of the core correctly "
"using the TSAI tiler, or that you've set the right poslog file."
)

core_region_mapping[region_match.values[0]] = core["name"]

region_core_info["Core"] = region_core_info["Region"].map(core_region_mapping)
return region_core_info


def generate_glycan_mask(
imz_data: ImzMLParser, glycan_img_path: Path, region_core_info: pd.DataFrame, cores_to_crop: List[str]
):
"""Generate a mask for the specified cores, provided a glycan image input.

Args:
---
imz_data (ImzMLParser): The imzML object, needed for coordinate identification.
glycan_img_path (Path): The path to the glycan image .tiff, needed to create the base mask.
region_core_info (pd.DataFrame): Defines the coordinates associated with each FOV.
cores_to_crop (List[str]): Which cores to segment out.

Returns:
-------
np.ndarray:
The binary segmentation mask of the glycan image
"""
validate_paths([glycan_img_path])

glycan_img = imread(glycan_img_path)
glycan_mask = np.zeros(glycan_img.shape)

coords = region_core_info.loc[region_core_info["Core"].isin(cores_to_crop), ["X", "Y"]].values
glycan_mask[coords[:, 1] - 1, coords[:, 0] - 1] = 255

return glycan_mask
106 changes: 76 additions & 30 deletions templates/maldi-pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -336,9 +336,7 @@
{
alex-l-kong marked this conversation as resolved.
Show resolved Hide resolved
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"metadata": {},
"outputs": [],
"source": [
"print(f\"Candiate Peak Count: {len(peak_candidates)}\")"
Expand Down Expand Up @@ -393,25 +391,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"peak_df, l_ips_r, r_ips_r, peak_widths_height = extraction.get_peak_widths(\n",
" total_mass_df=total_mass_df,\n",
" peak_candidate_idxs=peak_candidate_idxs,\n",
" peak_candidates=peak_candidates,\n",
" thresholds=thresholds,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"metadata": {},
"outputs": [],
"source": [
"save_peak_spectra_debug = True"
Expand Down Expand Up @@ -441,9 +421,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"metadata": {},
"outputs": [],
"source": [
"panel_df"
Expand Down Expand Up @@ -472,9 +450,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"metadata": {},
"outputs": [],
"source": [
"image_data"
Expand Down Expand Up @@ -574,12 +550,82 @@
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Core Cropping"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"After glycan matching, each core on the TMA should be appropriately named by the <a href=https://tsai.stanford.edu/research/maldi_tma/>TSAI MALDI tiler</a>. **Ensure that this step is completed before running the following section.**\n",
"\n",
"To extract FOV-level statistics, a mask will be generated to segment out individual cores on a TMA. This section first maps each acquired coordinate on the slide to their core as defined by the TSAI MALDI tiler, then generates a mask for specific cores on the TMA."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# TSAI MALDI tiler output, contains name of each core mapped to respective centroid\n",
"centroid_path = \"path/to/centroids.json\"\n",
"\n",
"# contains all coordinates in the order of acquisition\n",
"poslog_path = \"path/to/poslog.txt\"\n",
"\n",
"# define path to one glycan image, needed to find dimensions of mask\n",
"glycan_img_path = \"path/to/glycan_img.tiff\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# map coordinates to core names\n",
"region_core_info = extraction.map_coordinates_to_core_name(\n",
" imz_data=imz_data,\n",
" centroid_path=centroid_path,\n",
" poslog_path=poslog_path\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"# define which core(s) you want to segment out\n",
"cores_to_crop = [\"R1C1\", \"R1C2\"]\n",
"\n",
"# alternatively, comment out the following line to crop out all cores\n",
"# cores_to_crop = region_core_info[\"Core\"].unique().tolist()\n",
"\n",
"# extract a binary mask with just the cores specified\n",
"core_cropping_mask = extraction.generate_glycan_mask(\n",
" imz_data=imz_data,\n",
" glycan_img_path=glycan_img_path,\n",
" region_core_info=region_core_info,\n",
" cores_to_crop=cores_to_crop\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# visualize the mask\n",
"_ = plt.imshow(core_cropping_mask)"
]
}
],
"metadata": {
Expand All @@ -598,7 +644,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.1"
"version": "3.11.6"
},
"vscode": {
"interpreter": {
Expand Down
Loading