diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..29617bd --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,31 @@ +name: continuous-integration + +on: [push] + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + # Requirements file generated with python=3.11 + python-version: ["3.11"] + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt # test with requirements file so can easily bump with dependabot + pip install . + + - name: Compile cython module + run: python setup.py build_ext --inplace + + - name: Test + run: | + python -m pytest tests/ diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml new file mode 100644 index 0000000..7e6ee06 --- /dev/null +++ b/.github/workflows/dependabot.yml @@ -0,0 +1,9 @@ +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" # Location of your pyproject.toml or requirements.txt + schedule: + interval: "weekly" # Checks for updates every week + commit-message: + prefix: "deps" # Prefix for pull request titles + open-pull-requests-limit: 5 # Limit the number of open PRs at a time diff --git a/.gitignore b/.gitignore index f374cdc..d9a6697 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ nosetests.xml # vim *.sw[opqrs] +*~ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..00f104d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,55 @@ +[build-system] +requires = ["setuptools", "wheel", "numpy", "cython"] # Dependencies needed to build the package +build-backend = "setuptools.build_meta" + +[project] +name = "pyprophet" +version = "2.2.9" +description = "PyProphet: Semi-supervised learning and scoring of OpenSWATH results." +readme = { file = "README.md", content-type = "text/markdown" } +license = { text = "BSD" } +authors = [{ name = "The PyProphet Developers", email = "rocksportrocker@gmail.com" }] +classifiers = [ + "Development Status :: 3 - Alpha", + "Environment :: Console", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Chemistry" +] +keywords = ["bioinformatics", "openSWATH", "mass spectrometry"] + +# Dependencies required for runtime +dependencies = [ + "Click", + "duckdb", + "duckdb-extensions", + "duckdb-extension-sqlite-scanner", + "numpy >= 2.0", + "scipy", + "pandas >= 0.17", + "cython", + "numexpr >= 2.10.1", + "scikit-learn >= 0.17", + "xgboost", + "hyperopt", + "statsmodels >= 0.8.0", + "matplotlib", + "tabulate", + "pyarrow", + "pypdf" +] + +# Optional dependencies +[project.optional-dependencies] +testing = ["pytest", "pytest-regtest"] + +# Define console entry points +[project.scripts] +pyprophet = "pyprophet.main:cli" + +[tool.setuptools] +packages = { find = { exclude = ["ez_setup", "examples", "tests"] } } +include-package-data = true +zip-safe = false diff --git a/pyprophet/classifiers.py b/pyprophet/classifiers.py index 750bc82..b23c96c 100644 --- a/pyprophet/classifiers.py +++ b/pyprophet/classifiers.py @@ -110,7 +110,7 @@ def objective(params): clf = xgb.XGBClassifier(random_state=42, verbosity=0, objective='binary:logitraw', eval_metric='auc', **params) - score = cross_val_score(clf, X, y, scoring='roc_auc', n_jobs=self.threads, cv=KFold(n_splits=3, shuffle=True, random_state=np.random.RandomState(42))).mean() + score = cross_val_score(clf, X, y, scoring='roc_auc', n_jobs=self.threads, cv=KFold(n_splits=3, shuffle=True, random_state=42)).mean() # click.echo("Info: AUC: {:.3f} hyperparameters: {}".format(score, params)) return score @@ -129,7 +129,8 @@ def objective(params): xgb_params_complexity = self.xgb_params_tuned xgb_params_complexity.update({k: self.xgb_params_space[k] for k in ('max_depth', 'min_child_weight')}) - best_complexity = fmin(fn=objective, space=xgb_params_complexity, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=np.random.RandomState(42)) + rng = np.random.default_rng(42) + best_complexity = fmin(fn=objective, space=xgb_params_complexity, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=rng) best_complexity['max_depth'] = int(best_complexity['max_depth']) best_complexity['min_child_weight'] = int(best_complexity['min_child_weight']) @@ -139,7 +140,7 @@ def objective(params): xgb_params_gamma = self.xgb_params_tuned xgb_params_gamma['gamma'] = self.xgb_params_space['gamma'] - best_gamma = fmin(fn=objective, space=xgb_params_gamma, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=np.random.RandomState(42)) + best_gamma = fmin(fn=objective, space=xgb_params_gamma, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=rng) self.xgb_params_tuned.update(best_gamma) @@ -147,7 +148,7 @@ def objective(params): xgb_params_subsampling = self.xgb_params_tuned xgb_params_subsampling.update({k: self.xgb_params_space[k] for k in ('subsample', 'colsample_bytree', 'colsample_bylevel', 'colsample_bynode')}) - best_subsampling = fmin(fn=objective, space=xgb_params_subsampling, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=np.random.RandomState(42)) + best_subsampling = fmin(fn=objective, space=xgb_params_subsampling, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=rng) self.xgb_params_tuned.update(best_subsampling) @@ -155,7 +156,7 @@ def objective(params): xgb_params_regularization = self.xgb_params_tuned xgb_params_regularization.update({k: self.xgb_params_space[k] for k in ('lambda', 'alpha')}) - best_regularization = fmin(fn=objective, space=xgb_params_regularization, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=np.random.RandomState(42)) + best_regularization = fmin(fn=objective, space=xgb_params_regularization, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=rng) self.xgb_params_tuned.update(best_regularization) @@ -163,7 +164,7 @@ def objective(params): xgb_params_learning = self.xgb_params_tuned xgb_params_learning['eta'] = self.xgb_params_space['eta'] - best_learning = fmin(fn=objective, space=xgb_params_learning, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=np.random.RandomState(42)) + best_learning = fmin(fn=objective, space=xgb_params_learning, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=rng) self.xgb_params_tuned.update(best_learning) click.echo("Info: Optimal hyperparameters: {}".format(self.xgb_params_tuned)) diff --git a/pyprophet/data_handling.py b/pyprophet/data_handling.py index a7c50a1..23ddd65 100644 --- a/pyprophet/data_handling.py +++ b/pyprophet/data_handling.py @@ -5,6 +5,7 @@ import sys import os import multiprocessing +from .stats import mean_and_std_dev from .optimized import find_top_ranked, rank @@ -336,6 +337,21 @@ def get_top_target_peaks(self): def get_feature_matrix(self, use_main_score): min_col = 5 if use_main_score else 6 return self.df.iloc[:, min_col:-1].values + + def normalize_score_by_decoys(self, score_col_name): + ''' + normalize the decoy scores to mean 0 and std 1, scale the targets accordingly + Args: + score_col_name: str, the name of the score column + ''' + td_scores = self.get_top_decoy_peaks()[score_col_name] + mu, nu = mean_and_std_dev(td_scores) + + if nu == 0: + raise Exception("Warning: Standard deviation of decoy scores is zero. Cannot normalize scores.") + + self.df.loc[:, score_col_name] = (self.df[score_col_name] - mu) / nu + def filter_(self, idx): return Experiment(self.df[idx]) @@ -344,7 +360,7 @@ def filter_(self, idx): def add_peak_group_rank(self): ids = self.df.tg_num_id.values scores = self.df.d_score.values - peak_group_ranks = rank(ids, scores) + peak_group_ranks = rank(ids, scores.astype(np.float32, copy=False)) self.df["peak_group_rank"] = peak_group_ranks @profile diff --git a/pyprophet/export_parquet.py b/pyprophet/export_parquet.py index fe0d3ed..795a12a 100644 --- a/pyprophet/export_parquet.py +++ b/pyprophet/export_parquet.py @@ -172,7 +172,7 @@ def export_to_parquet(infile, outfile, transitionLevel, onlyFeatures=False): # transition level if transitionLevel: - columns['FEATURE_TRANSITION'] = ['AREA_INTENSITY', 'TOTAL_AREA_INTENSITY', 'APEX_INTENSITY', 'TOTAL_MI'] + getVarColumnNames(condb, 'FEATURE_TRANSITION') + columns['FEATURE_TRANSITION'] = ['AREA_INTENSITY', 'TOTAL_AREA_INTENSITY', 'APEX_INTENSITY', 'TOTAL_MI'] + getVarColumnNames(con, 'FEATURE_TRANSITION') columns['TRANSITION'] = ['TRAML_ID', 'PRODUCT_MZ', 'CHARGE', 'TYPE', 'ORDINAL', 'DETECTING', 'IDENTIFYING', 'QUANTIFYING', 'LIBRARY_INTENSITY'] columns['TRANSITION_PRECURSOR_MAPPING'] = ['TRANSITION_ID'] diff --git a/pyprophet/levels_contexts.py b/pyprophet/levels_contexts.py index c7ce194..edc3602 100644 --- a/pyprophet/levels_contexts.py +++ b/pyprophet/levels_contexts.py @@ -33,7 +33,12 @@ def statistics_report(data, outfile, context, analyte, parametric, pfdr, pi0_lam outfile = outfile + "_" + str(data['run_id'].unique()[0]) # export PDF report - save_report(outfile + "_" + context + "_" + analyte + ".pdf", outfile + ": " + context + " " + analyte + "-level error-rate control", data[data.decoy==1]["score"], data[data.decoy==0]["score"], stat_table["cutoff"], stat_table["svalue"], stat_table["qvalue"], data[data.decoy==0]["p_value"], pi0, color_palette) + save_report(outfile + "_" + context + "_" + analyte + ".pdf", + outfile + ": " + context + " " + analyte + "-level error-rate control", + data[data.decoy==1]["score"].values, data[data.decoy==0]["score"].values, stat_table["cutoff"].values, + stat_table["svalue"].values, stat_table["qvalue"].values, data[data.decoy==0]["p_value"].values, + pi0, + color_palette) return(data) @@ -184,7 +189,7 @@ def infer_proteins(infile, outfile, context, parametric, pfdr, pi0_lambda, pi0_m con.close() if context == 'run-specific': - data = data.groupby('run_id').apply(statistics_report, outfile, context, "protein", parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette).reset_index() + data = data.groupby('run_id').apply(statistics_report, outfile, context, "protein", parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette) elif context in ['global', 'experiment-wide']: data = statistics_report(data, outfile, context, "protein", parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette) @@ -257,7 +262,7 @@ def infer_peptides(infile, outfile, context, parametric, pfdr, pi0_lambda, pi0_m con.close() if context == 'run-specific': - data = data.groupby('run_id').apply(statistics_report, outfile, context, "peptide", parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette).reset_index() + data = data.groupby('run_id').apply(statistics_report, outfile, context, "peptide", parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette) elif context in ['global', 'experiment-wide']: data = statistics_report(data, outfile, context, "peptide", parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette) diff --git a/pyprophet/main.py b/pyprophet/main.py index b3257ea..24f97ab 100644 --- a/pyprophet/main.py +++ b/pyprophet/main.py @@ -106,6 +106,8 @@ def score(infile, outfile, classifier, xgb_autotune, apply_weights, xeval_fracti xgb_hyperparams = {'autotune': xgb_autotune, 'autotune_num_rounds': 10, 'num_boost_round': 100, 'early_stopping_rounds': 10, 'test_size': 0.33} xgb_params = {'eta': 0.3, 'gamma': 0, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 1, 'colsample_bytree': 1, 'colsample_bylevel': 1, 'colsample_bynode': 1, 'lambda': 1, 'alpha': 0, 'scale_pos_weight': 1, 'verbosity': 0, 'objective': 'binary:logitraw', 'nthread': 1, 'eval_metric': 'auc'} + if test: + xgb_params['tree_method'] = 'exact' xgb_params_space = {'eta': hp.uniform('eta', 0.0, 0.3), 'gamma': hp.uniform('gamma', 0.0, 0.5), 'max_depth': hp.quniform('max_depth', 2, 8, 1), 'min_child_weight': hp.quniform('min_child_weight', 1, 5, 1), 'subsample': 1, 'colsample_bytree': 1, 'colsample_bylevel': 1, 'colsample_bynode': 1, 'lambda': hp.uniform('lambda', 0.0, 1.0), 'alpha': hp.uniform('alpha', 0.0, 1.0), 'scale_pos_weight': 1.0, 'verbosity': 0, 'objective': 'binary:logitraw', 'nthread': 1, 'eval_metric': 'auc'} diff --git a/pyprophet/semi_supervised.py b/pyprophet/semi_supervised.py index 87b56cb..7735131 100644 --- a/pyprophet/semi_supervised.py +++ b/pyprophet/semi_supervised.py @@ -3,7 +3,7 @@ from .data_handling import Experiment, update_chosen_main_score_in_table from .classifiers import AbstractLearner, XGBLearner -from .stats import mean_and_std_dev, find_cutoff +from .stats import find_cutoff try: profile @@ -64,13 +64,9 @@ def learn_randomized(self, experiment, score_columns, working_thread_number): # after semi supervised iteration: classify full dataset clf_scores = self.score(experiment, params) - mu, nu = mean_and_std_dev(clf_scores) experiment.set_and_rerank("classifier_score", clf_scores) - td_scores = experiment.get_top_decoy_peaks()["classifier_score"] - - mu, nu = mean_and_std_dev(td_scores) - experiment["classifier_score"] = (experiment["classifier_score"] - mu) / nu + experiment.normalize_score_by_decoys('classifier_score') experiment.rank_by("classifier_score") top_test_peaks = experiment.get_top_test_peaks() @@ -92,13 +88,9 @@ def learn_final(self, experiment): # after semi supervised iteration: classify full dataset clf_scores = self.score(experiment, params) - mu, nu = mean_and_std_dev(clf_scores) experiment.set_and_rerank("classifier_score", clf_scores) - td_scores = experiment.get_top_decoy_peaks()["classifier_score"] - - mu, nu = mean_and_std_dev(td_scores) - experiment["classifier_score"] = (experiment["classifier_score"] - mu) / nu + experiment.normalize_score_by_decoys('classifier_score') experiment.rank_by("classifier_score") return params diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3129b63 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,118 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --all-extras --output-file=requirements.txt +# +click==8.1.7 + # via pyprophet (setup.py) +cloudpickle==3.1.0 + # via hyperopt +contourpy==1.3.0 + # via matplotlib +cycler==0.12.1 + # via matplotlib +cython==3.0.11 + # via pyprophet (setup.py) +duckdb==1.1.3 + # via + # duckdb-extension-sqlite-scanner + # duckdb-extensions + # pyprophet (setup.py) +duckdb-extension-sqlite-scanner==1.1.3 + # via pyprophet (setup.py) +duckdb-extensions==1.1.3 + # via pyprophet (setup.py) +fonttools==4.55.0 + # via matplotlib +future==1.0.0 + # via hyperopt +hyperopt==0.2.7 + # via pyprophet (setup.py) +iniconfig==2.0.0 + # via pytest +joblib==1.4.2 + # via scikit-learn +kiwisolver==1.4.7 + # via matplotlib +matplotlib==3.9.2 + # via pyprophet (setup.py) +networkx==3.2.1 + # via hyperopt +numexpr==2.10.1 + # via pyprophet (setup.py) +numpy==2.0.2 + # via + # contourpy + # hyperopt + # matplotlib + # numexpr + # pandas + # patsy + # pyprophet (setup.py) + # scikit-learn + # scipy + # statsmodels + # xgboost +nvidia-nccl-cu12==2.23.4 + # via xgboost +packaging==24.2 + # via + # matplotlib + # pytest + # statsmodels +pandas==2.2.3 + # via + # pyprophet (setup.py) + # statsmodels +patsy==1.0.1 + # via statsmodels +pillow==11.0.0 + # via matplotlib +pluggy==1.5.0 + # via pytest +py4j==0.10.9.7 + # via hyperopt +pyarrow==18.0.0 + # via pyprophet (setup.py) +pyparsing==3.2.0 + # via matplotlib +pypdf==5.1.0 + # via pyprophet (setup.py) +pytest==8.3.3 + # via + # pyprophet (setup.py) + # pytest-regtest +pytest-regtest==2.3.3 + # via pyprophet (setup.py) +python-dateutil==2.9.0.post0 + # via + # matplotlib + # pandas +pytz==2024.2 + # via pandas +scikit-learn==1.5.2 + # via pyprophet (setup.py) +scipy==1.13.1 + # via + # hyperopt + # pyprophet (setup.py) + # scikit-learn + # statsmodels + # xgboost +six==1.16.0 + # via + # hyperopt + # python-dateutil +statsmodels==0.14.4 + # via pyprophet (setup.py) +tabulate==0.9.0 + # via pyprophet (setup.py) +threadpoolctl==3.5.0 + # via scikit-learn +tqdm==4.67.0 + # via hyperopt +tzdata==2024.2 + # via pandas +xgboost==2.1.2 + # via pyprophet (setup.py) diff --git a/setup.py b/setup.py index 2c14525..95ea8a3 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,6 @@ -import sys +from setuptools import setup, Extension, find_packages +from Cython.Build import cythonize import numpy -from setuptools import setup, find_packages -from distutils.extension import Extension try: from Cython.Build import cythonize @@ -10,66 +9,11 @@ else: use_cython = True -cmdclass = {} ext_modules = [] - if use_cython: ext_modules += [Extension("pyprophet._optimized", ["pyprophet/_optimized.pyx"])] ext_modules = cythonize(ext_modules) else: ext_modules += [Extension("pyprophet._optimized", ["pyprophet/_optimized.c"])] -# read the contents of README for PyPI -from os import path -this_directory = path.abspath(path.dirname(__file__)) -with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f: - long_description = f.read() - -setup(name='pyprophet', - version="2.2.9", - author="The PyProphet Developers", - author_email="rocksportrocker@gmail.com", - description="PyProphet: Semi-supervised learning and scoring of OpenSWATH results.", - long_description=long_description, - long_description_content_type='text/markdown', - license="BSD", - url="https://github.com/PyProphet/pyprophet", - packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), - include_package_data=True, - include_dirs=[numpy.get_include()], - classifiers=[ - 'Development Status :: 3 - Alpha', - 'Environment :: Console', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: BSD License', - 'Operating System :: OS Independent', - 'Topic :: Scientific/Engineering :: Bio-Informatics', - 'Topic :: Scientific/Engineering :: Chemistry', - ], - zip_safe=False, - install_requires=[ - "Click", - "duckdb", - "duckdb-extensions", - "duckdb-extension-sqlite-scanner", - "numpy >= 1.9.0", - "scipy", - "pandas >= 0.17", - "cython", - "numexpr >= 2.10.1", - "scikit-learn >= 0.17", - "xgboost", - "hyperopt", - "statsmodels >= 0.8.0", - "matplotlib", - "tabulate", - "pyarrow", - "pypdf" - ], - entry_points={ - 'console_scripts': [ - "pyprophet=pyprophet.main:cli", - ] - }, - ext_modules=ext_modules, - ) +setup(name='pyprophet', ext_modules=ext_modules, include_dirs=[numpy.get_include()]) \ No newline at end of file diff --git a/tests/_regtest_outputs/test_pyprophet_score.test_osw_4.out b/tests/_regtest_outputs/test_pyprophet_score.test_osw_4.out index 8f458fc..bde61a3 100644 --- a/tests/_regtest_outputs/test_pyprophet_score.test_osw_4.out +++ b/tests/_regtest_outputs/test_pyprophet_score.test_osw_4.out @@ -1,14 +1,14 @@ feature_id ms1_precursor_pep ms2_peakgroup_pep ms2_precursor_pep -0 -4409520928686189639 0.0045 0.0045 0.1757 -1 -7771919224870429764 0.0045 0.0045 0.1757 -2 -797725006165535344 0.0045 0.0045 0.1291 -3 -1732939685941081620 0.0045 0.0045 0.1757 -4 -6747816958328369759 0.0045 0.0045 0.1757 +0 -4409520928686189639 0.0008 0.0024 0.8813 +1 -7771919224870429764 0.0008 0.0024 0.8813 +2 -797725006165535344 0.0008 0.0024 0.0883 +3 -1732939685941081620 0.0008 0.0024 0.8611 +4 -6747816958328369759 0.0008 0.0024 0.8813 .. ... ... ... ... -95 237580321205345393 0.0045 0.0045 0.1291 -96 5416940836005312912 0.0045 0.0045 0.1291 -97 -7541234528799769804 0.0045 0.0045 0.1757 -98 8036548921756545335 0.0045 0.0045 0.1291 -99 -6558503086717676095 0.0045 0.0045 0.1291 +95 -6034887541083502974 0.0008 0.0024 0.8461 +96 483971408708572459 0.0008 0.0024 0.0883 +97 5086440667566053402 0.0008 0.0024 0.9278 +98 7291105701317857435 0.0008 0.0024 0.8813 +99 237580321205345393 0.0008 0.0024 0.8461 [100 rows x 4 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_score.test_osw_5.out b/tests/_regtest_outputs/test_pyprophet_score.test_osw_5.out index e8d7f7c..8b58143 100644 --- a/tests/_regtest_outputs/test_pyprophet_score.test_osw_5.out +++ b/tests/_regtest_outputs/test_pyprophet_score.test_osw_5.out @@ -1,14 +1,14 @@ feature_id ms1_precursor_pep ms2_peakgroup_pep ms2_precursor_pep -0 -4409520928686189639 0.0045 0.0043 0.6924 -1 -7771919224870429764 0.0045 0.0043 0.6924 -2 -797725006165535344 0.0045 0.0043 0.3053 -3 -1732939685941081620 0.0045 0.0043 0.6924 -4 -6747816958328369759 0.0045 0.0043 0.6924 +0 -4409520928686189639 0.0008 0.004 0.2748 +1 -7771919224870429764 0.0008 0.004 0.3580 +2 -797725006165535344 0.0008 0.004 0.2370 +3 -1732939685941081620 0.0008 0.004 0.2748 +4 -6747816958328369759 0.0008 0.004 0.3580 .. ... ... ... ... -95 -5977524328878179832 0.0045 0.0043 0.6924 -96 -6034887541083502974 0.0045 0.0043 0.6924 -97 483971408708572459 0.0045 0.0043 0.1735 -98 5086440667566053402 0.0045 0.0043 0.6924 -99 7291105701317857435 0.0045 0.0043 0.6924 +95 8943629340769664660 0.0008 0.004 0.2370 +96 -6034887541083502974 0.0008 0.004 0.2748 +97 483971408708572459 0.0008 0.004 0.2370 +98 5086440667566053402 0.0008 0.004 0.3580 +99 7291105701317857435 0.0008 0.004 0.2748 [100 rows x 4 columns] diff --git a/tests/test_pyprophet_export_parquet.py b/tests/test_pyprophet_export_parquet.py index b4f3dba..6c01696 100644 --- a/tests/test_pyprophet_export_parquet.py +++ b/tests/test_pyprophet_export_parquet.py @@ -27,14 +27,14 @@ def _run_cmdline(cmdline): return stdout -def _run_export_parquet_single_run(temp_folder, transitionLevel=False, threads=1, chunksize=1000, pd_testing_kwargs=dict(check_dtype=False, check_names=False), onlyFeatures=False): +def _run_export_parquet_single_run(temp_folder, transitionLevel=False, pd_testing_kwargs=dict(check_dtype=False, check_names=False), onlyFeatures=False): os.chdir(temp_folder) DATA_NAME="dummyOSWScoredData.osw" data_path = os.path.join(DATA_FOLDER, DATA_NAME) conn = sqlite3.connect(DATA_NAME) shutil.copy(data_path, temp_folder) - cmdline = "pyprophet export-parquet --in={} --threads={} --chunksize={}".format(DATA_NAME, threads, chunksize) + cmdline = "pyprophet export-parquet --in={}".format(DATA_NAME) # if testing transition level add --transitionLevel flag if transitionLevel: @@ -112,19 +112,4 @@ def test_export_parquet_single_run_onlyFeatures(tmpdir): def test_export_parquet_single_run_transitionLevel_onlyFeatures(tmpdir): - _run_export_parquet_single_run(tmpdir, transitionLevel=True, onlyFeatures=True) - - -def test_multithread_export_parquet_single_run(tmpdir): - _run_export_parquet_single_run(tmpdir, transitionLevel=False, threads=2, chunksize=2) - -def test_multithread_export_parquet_single_run_transitionLevel(tmpdir): - _run_export_parquet_single_run(tmpdir, transitionLevel=True, threads=2, chunksize=2) - - -def test_multithread_export_parquet_single_run_onlyFeatures(tmpdir): - _run_export_parquet_single_run(tmpdir, onlyFeatures=True, threads=2, chunksize=4) - - -def test_multithread_export_parquet_single_run_transitionLevel_onlyFeatures(tmpdir): - _run_export_parquet_single_run(tmpdir, transitionLevel=True, onlyFeatures=True, threads=2, chunksize=4) + _run_export_parquet_single_run(tmpdir, transitionLevel=True, onlyFeatures=True) \ No newline at end of file