From a6266395593b6f4c5e611eefcbba809381b7ae45 Mon Sep 17 00:00:00 2001 From: Michelle Khoo Date: Mon, 16 Sep 2024 15:16:23 -0700 Subject: [PATCH 1/4] Fix scaling issues by using absolute cell counts --- setup.py | 2 +- stats_utils/corrector.py | 24 ++++++++++-------------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/setup.py b/setup.py index 0b6286b..8ed2473 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ def get_data_files(parent_dir): setup( name="stats_utils", - version="0.0.9", + version="0.0.10", description="Statistics utilities for remoscope and corresponding paper", long_description=readme(), url="https://github.com/czbiohub-sf/remo-stats-utils", diff --git a/stats_utils/corrector.py b/stats_utils/corrector.py index 69d410a..9b0fe4c 100644 --- a/stats_utils/corrector.py +++ b/stats_utils/corrector.py @@ -83,7 +83,7 @@ def _calc_parasitemia( return 0 if rbcs == 0 else parasites / rbcs - def _calc_parasitemia_rel_err( + def _calc_parasites_abs_std( self, corrected_counts: npt.NDArray, count_vars: npt.NDArray, @@ -98,13 +98,7 @@ def _calc_parasitemia_rel_err( parasites = np.sum(corrected_counts[self.parasite_ids]) parasite_count_vars = count_vars[self.parasite_ids] - - # Compute error - return ( - np.inf - if parasites == 0 - else np.sqrt(np.sum(parasite_count_vars)) / parasites - ) + return np.sqrt(np.sum(parasite_count_vars)) def _get_res_from_counts( self, raw_counts: npt.NDArray, units_ul_out: bool = False @@ -124,29 +118,31 @@ def _get_res_from_counts( # Correct counts corrected_counts = self._correct_counts(raw_counts) parasites = np.sum(corrected_counts[self.parasite_ids]) + rbcs = np.sum(corrected_counts[self.rbc_ids]) # Calc_parasitemia - parasitemia = self._calc_parasitemia(corrected_counts, parasites=parasites) + parasitemia = 100.0 * self._calc_parasitemia(corrected_counts, parasites=parasites) # Get uncertainties count_vars = self._calc_count_vars(raw_counts) # Use rule of 3 if there are no parasites if parasites == 0: - abs_bound = 100 * 3 / corrected_counts[YOGO_CLASS_IDX_MAP["healthy"]] # unit: % + parasites_95_conf_bounds = 3 / rbcs + parasitemia_95_conf_bounds = 100 * parasites_95_conf_bounds / rbcs # unit: % else: - rel_bound = 1.69 * self._calc_parasitemia_rel_err( + parasites_95_conf_bounds = 1.69 * self._calc_parasites_abs_std( corrected_counts, count_vars, parasites=parasites ) - abs_bound = rel_bound * parasitemia # unit: % + parasitemia_95_conf_bounds = 100 * parasites_95_conf_bounds / rbcs # unit: % if units_ul_out: return ( parasitemia * RBCS_P_UL, # unit: parasitemia / uL - abs_bound * RBCS_P_UL, # unit: parasitemia / uL + parasitemia_95_conf_bounds * RBCS_P_UL, # unit: parasitemia / uL ) else: - return parasitemia, abs_bound + return parasitemia, parasitemia_95_conf_bounds def _get_95_confidence_bound(self, parasitemia: float, bound: float) -> List[float]: """ From 3c77b45be20c055ac462e62e9af87b7475a8191e Mon Sep 17 00:00:00 2001 From: Michelle Khoo Date: Mon, 16 Sep 2024 16:07:52 -0700 Subject: [PATCH 2/4] Autoformat --- stats_utils/corrector.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/stats_utils/corrector.py b/stats_utils/corrector.py index 9b0fe4c..2818f08 100644 --- a/stats_utils/corrector.py +++ b/stats_utils/corrector.py @@ -121,7 +121,9 @@ def _get_res_from_counts( rbcs = np.sum(corrected_counts[self.rbc_ids]) # Calc_parasitemia - parasitemia = 100.0 * self._calc_parasitemia(corrected_counts, parasites=parasites) + parasitemia = 100.0 * self._calc_parasitemia( + corrected_counts, parasites=parasites + ) # Get uncertainties count_vars = self._calc_count_vars(raw_counts) @@ -129,17 +131,21 @@ def _get_res_from_counts( # Use rule of 3 if there are no parasites if parasites == 0: parasites_95_conf_bounds = 3 / rbcs - parasitemia_95_conf_bounds = 100 * parasites_95_conf_bounds / rbcs # unit: % + parasitemia_95_conf_bounds = ( + 100 * parasites_95_conf_bounds / rbcs + ) # unit: % else: parasites_95_conf_bounds = 1.69 * self._calc_parasites_abs_std( corrected_counts, count_vars, parasites=parasites ) - parasitemia_95_conf_bounds = 100 * parasites_95_conf_bounds / rbcs # unit: % + parasitemia_95_conf_bounds = ( + 100 * parasites_95_conf_bounds / rbcs + ) # unit: % if units_ul_out: return ( - parasitemia * RBCS_P_UL, # unit: parasitemia / uL - parasitemia_95_conf_bounds * RBCS_P_UL, # unit: parasitemia / uL + parasitemia * RBCS_P_UL, # unit: parasitemia / uL + parasitemia_95_conf_bounds * RBCS_P_UL, # unit: parasitemia / uL ) else: return parasitemia, parasitemia_95_conf_bounds From 5f097ed14fdf1c16f36d3a01edddac33877d04d9 Mon Sep 17 00:00:00 2001 From: Michelle Khoo Date: Tue, 17 Sep 2024 16:13:44 -0700 Subject: [PATCH 3/4] Remove additional scaling by 1/n --- stats_utils/corrector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stats_utils/corrector.py b/stats_utils/corrector.py index 2818f08..3681148 100644 --- a/stats_utils/corrector.py +++ b/stats_utils/corrector.py @@ -130,7 +130,7 @@ def _get_res_from_counts( # Use rule of 3 if there are no parasites if parasites == 0: - parasites_95_conf_bounds = 3 / rbcs + parasites_95_conf_bounds = 3 parasitemia_95_conf_bounds = ( 100 * parasites_95_conf_bounds / rbcs ) # unit: % From 17eef3ee9d69cd007a37303e2695dc3cf000585e Mon Sep 17 00:00:00 2001 From: Michelle Khoo Date: Tue, 17 Sep 2024 16:22:37 -0700 Subject: [PATCH 4/4] Change int to float --- stats_utils/corrector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stats_utils/corrector.py b/stats_utils/corrector.py index 3681148..72de816 100644 --- a/stats_utils/corrector.py +++ b/stats_utils/corrector.py @@ -130,7 +130,7 @@ def _get_res_from_counts( # Use rule of 3 if there are no parasites if parasites == 0: - parasites_95_conf_bounds = 3 + parasites_95_conf_bounds = 3.0 parasitemia_95_conf_bounds = ( 100 * parasites_95_conf_bounds / rbcs ) # unit: %