From 60709ec166f6430af708eb3ada61106c8f7f66c1 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Sat, 6 Apr 2024 07:44:36 +0100 Subject: [PATCH 1/7] Update openms.py Small patch https://github.com/bigbio/sdrf-pipelines/issues/164 --- sdrf_pipelines/openms/openms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdrf_pipelines/openms/openms.py b/sdrf_pipelines/openms/openms.py index 59d02e16..45be9287 100644 --- a/sdrf_pipelines/openms/openms.py +++ b/sdrf_pipelines/openms/openms.py @@ -127,6 +127,7 @@ def __init__(self) -> None: "Glutamyl endopeptidase": "glutamyl endopeptidase", "Trypsin/p": "Trypsin/P", "Lys-c": "Lys-C", + "Lys-c/p": "Lys-C/P", "Lys-n": "Lys-N", "Arg-c": "Arg-C", "Arg-c/p": "Arg-C/P", From b8d01eaa81edf3f631b636ab83118320767ab058 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Sat, 6 Apr 2024 07:52:48 +0100 Subject: [PATCH 2/7] remove unused imports --- sdrf_pipelines/sdrf/sdrf.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sdrf_pipelines/sdrf/sdrf.py b/sdrf_pipelines/sdrf/sdrf.py index f4f34abc..52816c8a 100644 --- a/sdrf_pipelines/sdrf/sdrf.py +++ b/sdrf_pipelines/sdrf/sdrf.py @@ -3,8 +3,6 @@ import logging import pandas as pd -from pandas import DataFrame -from pandas._typing import PythonFuncType from sdrf_pipelines.sdrf.sdrf_schema import CELL_LINES_TEMPLATE from sdrf_pipelines.sdrf.sdrf_schema import HUMAN_TEMPLATE From 029bbc83245d2b917349438abe2b48cf1247402b Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Sat, 6 Apr 2024 07:59:35 +0100 Subject: [PATCH 3/7] small changes --- sdrf_pipelines/maxquant/maxquant.py | 45 +++++++++++---------- sdrf_pipelines/normalyzerde/normalyzerde.py | 5 ++- sdrf_pipelines/openms/openms.py | 7 ++-- sdrf_pipelines/zooma/ols.py | 2 +- 4 files changed, 31 insertions(+), 28 deletions(-) diff --git a/sdrf_pipelines/maxquant/maxquant.py b/sdrf_pipelines/maxquant/maxquant.py index b756bb4e..278c4214 100644 --- a/sdrf_pipelines/maxquant/maxquant.py +++ b/sdrf_pipelines/maxquant/maxquant.py @@ -709,17 +709,17 @@ def maxquant_ify_mods(self, sdrf_mods, mqconfdir): return ",".join(oms_mods) def maxquant_convert( - self, - sdrf_file, - fastaFilePath, - mqconfdir, - matchBetweenRuns, - peptideFDR, - proteinFDR, - tempFolder, - raw_Folder, - numThreads, - output_path, + self, + sdrf_file, + fastaFilePath, + mqconfdir, + matchBetweenRuns, + peptideFDR, + proteinFDR, + tempFolder, + raw_Folder, + numThreads, + output_path, ): print("PROCESSING: " + sdrf_file + '"') @@ -1695,7 +1695,8 @@ def maxquant_convert( referenceChannel = doc.createElement("referenceChannel") for key1, instr_val in file2instrument.items(): value2 = ( - str(file2enzyme[key1]) + file2label[key1] + str(file2mods[key1]) + str(file2pctol) + str(file2fragtol) + str(file2enzyme[key1]) + file2label[key1] + str(file2mods[key1]) + str(file2pctol) + str( + file2fragtol) ) datanalysisparams = {} for p in file2params.keys(): @@ -1718,10 +1719,10 @@ def maxquant_convert( "datanalysisparams": datanalysisparams, } if ( - "Lys8" in file2label[key1] - or "Arg10" in file2label[key1] - or "Arg6" in file2label[key1] - or "Lys6" in file2label[key1] + "Lys8" in file2label[key1] + or "Arg10" in file2label[key1] + or "Arg6" in file2label[key1] + or "Lys6" in file2label[key1] ): parameterGroup["0"]["silac_shape"] = file2silac_shape[key1] @@ -1749,10 +1750,10 @@ def maxquant_convert( } if ( - "Lys8" in file2label[key1] - or "Arg10" in file2label[key1] - or "Arg6" in file2label[key1] - or "Lys6" in file2label[key1] + "Lys8" in file2label[key1] + or "Arg10" in file2label[key1] + or "Arg6" in file2label[key1] + or "Lys6" in file2label[key1] ): parameterGroup[str(tag)]["silac_shape"] = file2silac_shape[key1] @@ -1925,8 +1926,8 @@ def maxquant_convert( if "Lys8" in j["label"] or "Arg10" in j["label"] or "Arg6" in j["label"] or "Lys6" in j["label"]: for lm in range(j["silac_shape"][0]): r = j["label"].split(",")[ - lm * j["silac_shape"][1] : lm * j["silac_shape"][1] + lm * j["silac_shape"][1] - ] + lm * j["silac_shape"][1]: lm * j["silac_shape"][1] + lm * j["silac_shape"][1] + ] if "Arg0" in r: r.remove("Arg0") text = ";".join(r) diff --git a/sdrf_pipelines/normalyzerde/normalyzerde.py b/sdrf_pipelines/normalyzerde/normalyzerde.py index 4e6c876a..892bf337 100644 --- a/sdrf_pipelines/normalyzerde/normalyzerde.py +++ b/sdrf_pipelines/normalyzerde/normalyzerde.py @@ -6,6 +6,7 @@ import pandas as pd + # Based on msstats class # example: parse_sdrf convert-normalyzerde -s ./testdata/PXD000288.sdrf.tsv -o ./normalyzer_design.tsv @@ -18,14 +19,14 @@ def __init__(self) -> None: # Consider unlabeled analysis for now def convert_normalyzerde_design( - self, sdrf_file, split_by_columns, annotation_path, comparisons_path, maxquant_exp_design_file + self, sdrf_file, split_by_columns, annotation_path, comparisons_path, maxquant_exp_design_file ): sdrf = pd.read_csv(sdrf_file, sep="\t") sdrf = sdrf.astype(str) sdrf.columns = map(str.lower, sdrf.columns) # convert column names to lower-case data = {} condition = [] - replicates = [] + [] runs = sdrf["comment[data file]"].tolist() source_names = sdrf["source name"].tolist() diff --git a/sdrf_pipelines/openms/openms.py b/sdrf_pipelines/openms/openms.py index 45be9287..18b9223c 100644 --- a/sdrf_pipelines/openms/openms.py +++ b/sdrf_pipelines/openms/openms.py @@ -126,6 +126,7 @@ def __init__(self) -> None: self.enzymes = { "Glutamyl endopeptidase": "glutamyl endopeptidase", "Trypsin/p": "Trypsin/P", + "Trypchymo": "TrypChymo", "Lys-c": "Lys-C", "Lys-c/p": "Lys-C/P", "Lys-n": "Lys-N", @@ -425,7 +426,7 @@ def openms_convert( if not split_by_columns: # output of search settings for every row in sdrf - self.save_search_settings_to_file("openms.tsv", sdrf, f2c, extension_convert=extension_convert) + self.save_search_settings_to_file("openms.tsv", sdrf, f2c) # output one experimental design file if one_table: @@ -459,7 +460,7 @@ def openms_convert( # extract rows from sdrf for current condition split_sdrf = sdrf.loc[sdrf["_conditions_from_factors"] == c] output_filename = "openms.tsv." + str(index) - self.save_search_settings_to_file(output_filename, split_sdrf, f2c, extension_convert=extension_convert) + self.save_search_settings_to_file(output_filename, split_sdrf, f2c) # output of experimental design output_filename = "experimental_design.tsv." + str(index) @@ -977,7 +978,7 @@ def writeOneTableExperimentalDesign( with open(output_filename, "w+") as of: of.write(f) - def save_search_settings_to_file(self, output_filename, sdrf, f2c, extension_convert): + def save_search_settings_to_file(self, output_filename, sdrf, f2c): f = "" open_ms_search_settings_header = [ "URI", diff --git a/sdrf_pipelines/zooma/ols.py b/sdrf_pipelines/zooma/ols.py index ace77e15..a19a890e 100644 --- a/sdrf_pipelines/zooma/ols.py +++ b/sdrf_pipelines/zooma/ols.py @@ -198,7 +198,7 @@ def search( bytype=bytype, rows=rows, num_retries=num_retries, - start=(rows + (start)), + start=(rows + start), ) ) return docs_found From c1a30331c60a4c968c2a66454b579ec3bf33cbf8 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Sat, 6 Apr 2024 08:00:26 +0100 Subject: [PATCH 4/7] small changes --- sdrf_pipelines/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdrf_pipelines/__init__.py b/sdrf_pipelines/__init__.py index 5a6b518d..15331024 100644 --- a/sdrf_pipelines/__init__.py +++ b/sdrf_pipelines/__init__.py @@ -1 +1 @@ -__version__ = "0.0.25" +__version__ = "0.0.26" From 09550bb92b5fbd416f9d6cfe53e71c06048111b9 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Sat, 6 Apr 2024 08:05:46 +0100 Subject: [PATCH 5/7] small changes isort+black --- sdrf_pipelines/normalyzerde/normalyzerde.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sdrf_pipelines/normalyzerde/normalyzerde.py b/sdrf_pipelines/normalyzerde/normalyzerde.py index 892bf337..c52c85e6 100644 --- a/sdrf_pipelines/normalyzerde/normalyzerde.py +++ b/sdrf_pipelines/normalyzerde/normalyzerde.py @@ -6,7 +6,6 @@ import pandas as pd - # Based on msstats class # example: parse_sdrf convert-normalyzerde -s ./testdata/PXD000288.sdrf.tsv -o ./normalyzer_design.tsv @@ -19,7 +18,7 @@ def __init__(self) -> None: # Consider unlabeled analysis for now def convert_normalyzerde_design( - self, sdrf_file, split_by_columns, annotation_path, comparisons_path, maxquant_exp_design_file + self, sdrf_file, split_by_columns, annotation_path, comparisons_path, maxquant_exp_design_file ): sdrf = pd.read_csv(sdrf_file, sep="\t") sdrf = sdrf.astype(str) From c03a75932e045c0e399423ecbeb3580c326f11f3 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Sat, 6 Apr 2024 08:08:25 +0100 Subject: [PATCH 6/7] small changes isort+black --- sdrf_pipelines/maxquant/maxquant.py | 45 ++++++++++++++--------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/sdrf_pipelines/maxquant/maxquant.py b/sdrf_pipelines/maxquant/maxquant.py index 278c4214..b756bb4e 100644 --- a/sdrf_pipelines/maxquant/maxquant.py +++ b/sdrf_pipelines/maxquant/maxquant.py @@ -709,17 +709,17 @@ def maxquant_ify_mods(self, sdrf_mods, mqconfdir): return ",".join(oms_mods) def maxquant_convert( - self, - sdrf_file, - fastaFilePath, - mqconfdir, - matchBetweenRuns, - peptideFDR, - proteinFDR, - tempFolder, - raw_Folder, - numThreads, - output_path, + self, + sdrf_file, + fastaFilePath, + mqconfdir, + matchBetweenRuns, + peptideFDR, + proteinFDR, + tempFolder, + raw_Folder, + numThreads, + output_path, ): print("PROCESSING: " + sdrf_file + '"') @@ -1695,8 +1695,7 @@ def maxquant_convert( referenceChannel = doc.createElement("referenceChannel") for key1, instr_val in file2instrument.items(): value2 = ( - str(file2enzyme[key1]) + file2label[key1] + str(file2mods[key1]) + str(file2pctol) + str( - file2fragtol) + str(file2enzyme[key1]) + file2label[key1] + str(file2mods[key1]) + str(file2pctol) + str(file2fragtol) ) datanalysisparams = {} for p in file2params.keys(): @@ -1719,10 +1718,10 @@ def maxquant_convert( "datanalysisparams": datanalysisparams, } if ( - "Lys8" in file2label[key1] - or "Arg10" in file2label[key1] - or "Arg6" in file2label[key1] - or "Lys6" in file2label[key1] + "Lys8" in file2label[key1] + or "Arg10" in file2label[key1] + or "Arg6" in file2label[key1] + or "Lys6" in file2label[key1] ): parameterGroup["0"]["silac_shape"] = file2silac_shape[key1] @@ -1750,10 +1749,10 @@ def maxquant_convert( } if ( - "Lys8" in file2label[key1] - or "Arg10" in file2label[key1] - or "Arg6" in file2label[key1] - or "Lys6" in file2label[key1] + "Lys8" in file2label[key1] + or "Arg10" in file2label[key1] + or "Arg6" in file2label[key1] + or "Lys6" in file2label[key1] ): parameterGroup[str(tag)]["silac_shape"] = file2silac_shape[key1] @@ -1926,8 +1925,8 @@ def maxquant_convert( if "Lys8" in j["label"] or "Arg10" in j["label"] or "Arg6" in j["label"] or "Lys6" in j["label"]: for lm in range(j["silac_shape"][0]): r = j["label"].split(",")[ - lm * j["silac_shape"][1]: lm * j["silac_shape"][1] + lm * j["silac_shape"][1] - ] + lm * j["silac_shape"][1] : lm * j["silac_shape"][1] + lm * j["silac_shape"][1] + ] if "Arg0" in r: r.remove("Arg0") text = ";".join(r) From e9f50fe4dac7d21b6895576273807c56a77db611 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Sat, 6 Apr 2024 08:11:06 +0100 Subject: [PATCH 7/7] small changes isort+black --- sdrf_pipelines/normalyzerde/normalyzerde.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdrf_pipelines/normalyzerde/normalyzerde.py b/sdrf_pipelines/normalyzerde/normalyzerde.py index c52c85e6..a01a8b6f 100644 --- a/sdrf_pipelines/normalyzerde/normalyzerde.py +++ b/sdrf_pipelines/normalyzerde/normalyzerde.py @@ -25,7 +25,6 @@ def convert_normalyzerde_design( sdrf.columns = map(str.lower, sdrf.columns) # convert column names to lower-case data = {} condition = [] - [] runs = sdrf["comment[data file]"].tolist() source_names = sdrf["source name"].tolist()