Skip to content

Commit

Permalink
filter out parameter files
Browse files Browse the repository at this point in the history
  • Loading branch information
bmeluch committed Feb 13, 2025
1 parent 284f80a commit 116bad3
Showing 1 changed file with 55 additions and 24 deletions.
79 changes: 55 additions & 24 deletions NOM_visualizations/python/nom_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -388,12 +388,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Find the raw data objects used as input for these analysis records by matching the analysis record's `has_input` slot to the `id` slot in the collection `data_object_set`."
"Find the raw data objects used as input for these analysis records by matching the analysis record's `has_input` slot to the `id` slot in the collection `data_object_set`. \n",
"\n",
"Workflows can take multiple inputs (eg. configuration or parameter files) in addition to the raw data, so we will filter the results to only keep raw data object IDs."
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 24,
"metadata": {},
"outputs": [
{
Expand All @@ -417,89 +419,114 @@
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>data_object_type</th>\n",
" <th>raw_id</th>\n",
" <th>raw_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Direct Infusion FT ICR-MS Raw Data</td>\n",
" <td>nmdc:dobj-11-04embv91</td>\n",
" <td>Lybrand_FT_62_W_23Aug19_Alder_Infuse_p3_1_01_4...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Direct Infusion FT ICR-MS Raw Data</td>\n",
" <td>nmdc:dobj-11-04ny1n21</td>\n",
" <td>Lybrand_FT_36_C_30Aug19_Alder_Infuse_p05_1_01_...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Direct Infusion FT ICR-MS Raw Data</td>\n",
" <td>nmdc:dobj-11-09p17z03</td>\n",
" <td>Lybrand_Permafrost_BOG_14_CHCl3_13Dec19_Alder_...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Direct Infusion FT ICR-MS Raw Data</td>\n",
" <td>nmdc:dobj-11-0cmhqk17</td>\n",
" <td>WHONDRS_S19S_0059_ICR_1_43_Alder_Inf_13Sept19_...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Direct Infusion FT ICR-MS Raw Data</td>\n",
" <td>nmdc:dobj-11-0rgvyp97</td>\n",
" <td>WHONDRS_S19S_R33_14Sept2020_Alder_Infuse_p15_1...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2614</th>\n",
" <th>2578</th>\n",
" <td>NaN</td>\n",
" <td>nmdc:dobj-13-va7e5m75</td>\n",
" <td>output: Brodie_150_H2O_11Mar19_R2_HESI_Neg</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2615</th>\n",
" <th>2579</th>\n",
" <td>NaN</td>\n",
" <td>nmdc:dobj-13-w9czqg70</td>\n",
" <td>output: Brodie_118_H2O_14Mar19_R3_HESI_Neg</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2616</th>\n",
" <th>2580</th>\n",
" <td>NaN</td>\n",
" <td>nmdc:dobj-13-wn3wd765</td>\n",
" <td>output: Brodie_116_H2O_12Mar19_R2_HESI_Neg</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2617</th>\n",
" <th>2581</th>\n",
" <td>NaN</td>\n",
" <td>nmdc:dobj-13-xrxe2y20</td>\n",
" <td>output: Brodie_118_MeOH_R2_21Mar19_HESI_Neg</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2618</th>\n",
" <th>2582</th>\n",
" <td>NaN</td>\n",
" <td>nmdc:dobj-13-ya146a62</td>\n",
" <td>output: Brodie_120_MeOH_R2_21Mar19_HESI_Neg</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2619 rows × 2 columns</p>\n",
"<p>2583 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" raw_id raw_name\n",
"0 nmdc:dobj-11-04embv91 Lybrand_FT_62_W_23Aug19_Alder_Infuse_p3_1_01_4...\n",
"1 nmdc:dobj-11-04ny1n21 Lybrand_FT_36_C_30Aug19_Alder_Infuse_p05_1_01_...\n",
"2 nmdc:dobj-11-09p17z03 Lybrand_Permafrost_BOG_14_CHCl3_13Dec19_Alder_...\n",
"3 nmdc:dobj-11-0cmhqk17 WHONDRS_S19S_0059_ICR_1_43_Alder_Inf_13Sept19_...\n",
"4 nmdc:dobj-11-0rgvyp97 WHONDRS_S19S_R33_14Sept2020_Alder_Infuse_p15_1...\n",
"... ... ...\n",
"2614 nmdc:dobj-13-va7e5m75 output: Brodie_150_H2O_11Mar19_R2_HESI_Neg\n",
"2615 nmdc:dobj-13-w9czqg70 output: Brodie_118_H2O_14Mar19_R3_HESI_Neg\n",
"2616 nmdc:dobj-13-wn3wd765 output: Brodie_116_H2O_12Mar19_R2_HESI_Neg\n",
"2617 nmdc:dobj-13-xrxe2y20 output: Brodie_118_MeOH_R2_21Mar19_HESI_Neg\n",
"2618 nmdc:dobj-13-ya146a62 output: Brodie_120_MeOH_R2_21Mar19_HESI_Neg\n",
"\n",
"[2619 rows x 2 columns]"
" data_object_type raw_id \\\n",
"0 Direct Infusion FT ICR-MS Raw Data nmdc:dobj-11-04embv91 \n",
"1 Direct Infusion FT ICR-MS Raw Data nmdc:dobj-11-04ny1n21 \n",
"2 Direct Infusion FT ICR-MS Raw Data nmdc:dobj-11-09p17z03 \n",
"3 Direct Infusion FT ICR-MS Raw Data nmdc:dobj-11-0cmhqk17 \n",
"4 Direct Infusion FT ICR-MS Raw Data nmdc:dobj-11-0rgvyp97 \n",
"... ... ... \n",
"2578 NaN nmdc:dobj-13-va7e5m75 \n",
"2579 NaN nmdc:dobj-13-w9czqg70 \n",
"2580 NaN nmdc:dobj-13-wn3wd765 \n",
"2581 NaN nmdc:dobj-13-xrxe2y20 \n",
"2582 NaN nmdc:dobj-13-ya146a62 \n",
"\n",
" raw_name \n",
"0 Lybrand_FT_62_W_23Aug19_Alder_Infuse_p3_1_01_4... \n",
"1 Lybrand_FT_36_C_30Aug19_Alder_Infuse_p05_1_01_... \n",
"2 Lybrand_Permafrost_BOG_14_CHCl3_13Dec19_Alder_... \n",
"3 WHONDRS_S19S_0059_ICR_1_43_Alder_Inf_13Sept19_... \n",
"4 WHONDRS_S19S_R33_14Sept2020_Alder_Infuse_p15_1... \n",
"... ... \n",
"2578 output: Brodie_150_H2O_11Mar19_R2_HESI_Neg \n",
"2579 output: Brodie_118_H2O_14Mar19_R3_HESI_Neg \n",
"2580 output: Brodie_116_H2O_12Mar19_R2_HESI_Neg \n",
"2581 output: Brodie_118_MeOH_R2_21Mar19_HESI_Neg \n",
"2582 output: Brodie_120_MeOH_R2_21Mar19_HESI_Neg \n",
"\n",
"[2583 rows x 3 columns]"
]
},
"execution_count": 5,
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -510,12 +537,16 @@
" id_field=\"analysis_has_input\",\\\n",
" query_collection=\"data_object_set\",\\\n",
" match_id_field=\"id\",\\\n",
" query_fields=\"id,name\")\n",
" query_fields=\"id,name,data_object_type\")\n",
"# clarify names\n",
"for dataobject in raw_dataobj:\n",
" dataobject[\"raw_id\"] = dataobject.pop(\"id\")\n",
" dataobject[\"raw_name\"] = dataobject.pop(\"name\")\n",
"\n",
"# Filter out parameter files (leave NAs in case raw data files are missing a label)\n",
"param_dataobj = [file for file in raw_dataobj if 'data_object_type' in file and file['data_object_type'] == 'Analysis Tool Parameter File']\n",
"raw_dataobj = [file for file in raw_dataobj if file not in param_dataobj]\n",
"\n",
"raw_df = func.convert_df(raw_dataobj)\n",
"\n",
"raw_df"
Expand Down

0 comments on commit 116bad3

Please sign in to comment.