-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
54 changed files
with
2,024 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
name: math637 | ||
channels: | ||
- conda-forge | ||
- defaults | ||
dependencies: | ||
- appnope=0.1.2=py38h50d1736_2 | ||
- argon2-cffi=20.1.0=py38h9ed2024_1 | ||
- asttokens=2.0.5=pyhd8ed1ab_0 | ||
- attrs=21.4.0=pyhd3eb1b0_0 | ||
- backcall=0.2.0=pyh9f0ad1d_0 | ||
- backports=1.0=py_2 | ||
- backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0 | ||
- black=22.1.0=pyhd8ed1ab_0 | ||
- blas=1.0=mkl | ||
- bleach=4.1.0=pyhd3eb1b0_0 | ||
- bottleneck=1.3.2=py38hf1fa96c_1 | ||
- brotli=1.0.9=hb1e8313_2 | ||
- brotlipy=0.7.0=py38h96a0964_1003 | ||
- ca-certificates=2022.3.29=hecd8cb5_1 | ||
- certifi=2021.10.8=py38hecd8cb5_2 | ||
- cffi=1.15.0=py38hc55c11b_1 | ||
- charset-normalizer=2.0.11=pyhd8ed1ab_0 | ||
- click=8.0.3=py38h50d1736_1 | ||
- cryptography=36.0.1=py38h56c4533_0 | ||
- cycler=0.11.0=pyhd3eb1b0_0 | ||
- dataclasses=0.8=pyhc8e2a94_3 | ||
- debugpy=1.5.1=py38ha048514_0 | ||
- decorator=5.1.1=pyhd8ed1ab_0 | ||
- defusedxml=0.7.1=pyhd3eb1b0_0 | ||
- docopt=0.6.2=py_1 | ||
- entrypoints=0.4=pyhd8ed1ab_0 | ||
- executing=0.8.2=pyhd8ed1ab_0 | ||
- fonttools=4.25.0=pyhd3eb1b0_0 | ||
- freetype=2.11.0=hd8bbffd_0 | ||
- giflib=5.2.1=haf1e3a3_0 | ||
- greenlet=1.1.1=py38h23ab428_0 | ||
- idna=3.3=pyhd8ed1ab_0 | ||
- importlib-metadata=4.8.2=py38hecd8cb5_0 | ||
- importlib_metadata=4.8.2=hd3eb1b0_0 | ||
- intel-openmp=2021.4.0=hecd8cb5_3538 | ||
- ipykernel=6.9.0=py38h5fd9f69_0 | ||
- ipympl=0.8.7=pyhd3eb1b0_0 | ||
- ipython=8.0.1=py38h50d1736_0 | ||
- ipython_genutils=0.2.0=pyhd3eb1b0_1 | ||
- ipywidgets=7.6.5=pyhd3eb1b0_1 | ||
- jedi=0.18.1=py38h50d1736_0 | ||
- jinja2=3.0.2=pyhd3eb1b0_0 | ||
- jpeg=9d=h9ed2024_0 | ||
- jsonschema=3.2.0=pyhd3eb1b0_2 | ||
- jupyter_client=7.1.2=pyhd8ed1ab_0 | ||
- jupyter_core=4.9.1=py38h50d1736_1 | ||
- jupyterlab_pygments=0.1.2=py_0 | ||
- jupyterlab_widgets=1.0.0=pyhd3eb1b0_1 | ||
- kiwisolver=1.3.1=py38h23ab428_0 | ||
- lcms2=2.12=hf1fd2bf_0 | ||
- libcxx=12.0.0=h2f01273_0 | ||
- libffi=3.3=hb1e8313_2 | ||
- libgfortran=3.0.1=h93005f0_2 | ||
- libllvm11=11.1.0=h9b2ccf5_0 | ||
- libpng=1.6.37=ha441bb4_0 | ||
- libsodium=1.0.18=hbcb3906_1 | ||
- libtiff=4.2.0=h87d7836_0 | ||
- libwebp=1.2.0=hacca55c_0 | ||
- libwebp-base=1.2.0=h9ed2024_0 | ||
- llvm-openmp=12.0.0=h0dcd299_1 | ||
- llvmlite=0.37.0=py38he4411ff_1 | ||
- lz4-c=1.9.3=h23ab428_1 | ||
- markupsafe=2.0.1=py38h9ed2024_0 | ||
- matplotlib=3.5.0=py38hecd8cb5_0 | ||
- matplotlib-base=3.5.0=py38h4f681db_0 | ||
- matplotlib-inline=0.1.3=pyhd8ed1ab_0 | ||
- mistune=0.8.4=py38h1de35cc_1001 | ||
- mkl=2021.4.0=hecd8cb5_637 | ||
- mkl-service=2.4.0=py38h9ed2024_0 | ||
- mkl_fft=1.3.1=py38h4ab4a9b_0 | ||
- mkl_random=1.2.2=py38hb2f4e1b_0 | ||
- munkres=1.1.4=py_0 | ||
- mypy_extensions=0.4.3=py38h50d1736_4 | ||
- nbclient=0.5.11=pyhd3eb1b0_0 | ||
- nbconvert=6.3.0=py38hecd8cb5_0 | ||
- nbformat=5.1.3=pyhd3eb1b0_0 | ||
- ncurses=6.3=hca72f7f_2 | ||
- nest-asyncio=1.5.4=pyhd8ed1ab_0 | ||
- notebook=6.4.8=py38hecd8cb5_0 | ||
- numba=0.54.1=py38hae1ba45_0 | ||
- numexpr=2.8.1=py38h2e5f0a9_0 | ||
- numpy=1.20.3=py38h4b4dc7a_0 | ||
- numpy-base=1.20.3=py38he0bd621_0 | ||
- olefile=0.46=pyhd3eb1b0_0 | ||
- openssl=1.1.1n=hca72f7f_0 | ||
- packaging=21.3=pyhd3eb1b0_0 | ||
- pandas=1.3.5=py38h743cdd8_0 | ||
- pandocfilters=1.5.0=pyhd3eb1b0_0 | ||
- parso=0.8.3=pyhd8ed1ab_0 | ||
- pathspec=0.9.0=pyhd8ed1ab_0 | ||
- patsy=0.5.2=py38hecd8cb5_1 | ||
- pexpect=4.8.0=pyh9f0ad1d_2 | ||
- pickleshare=0.7.5=py_1003 | ||
- pillow=8.4.0=py38h98e4679_0 | ||
- pip=21.2.4=py38hecd8cb5_0 | ||
- pipreqs=0.4.10=py_0 | ||
- platformdirs=2.5.0=pyhd8ed1ab_0 | ||
- plotly=5.6.0=pyhd3eb1b0_0 | ||
- prometheus_client=0.13.1=pyhd3eb1b0_0 | ||
- prompt-toolkit=3.0.27=pyha770c72_0 | ||
- ptyprocess=0.7.0=pyhd3deb0d_0 | ||
- pure_eval=0.2.2=pyhd8ed1ab_0 | ||
- pycparser=2.21=pyhd8ed1ab_0 | ||
- pygments=2.11.2=pyhd8ed1ab_0 | ||
- pyopenssl=22.0.0=pyhd8ed1ab_0 | ||
- pyparsing=3.0.4=pyhd3eb1b0_0 | ||
- pyrsistent=0.18.0=py38hca72f7f_0 | ||
- pysocks=1.7.1=py38h50d1736_4 | ||
- python=3.8.10=h88f2d9e_7 | ||
- python-dateutil=2.8.2=pyhd8ed1ab_0 | ||
- python_abi=3.8=2_cp38 | ||
- pyzmq=22.3.0=py38hd3b92b6_1 | ||
- readline=8.1.2=hca72f7f_1 | ||
- requests=2.27.1=pyhd8ed1ab_0 | ||
- scipy=1.7.3=py38h8c7af03_0 | ||
- seaborn=0.11.2=pyhd3eb1b0_0 | ||
- send2trash=1.8.0=pyhd3eb1b0_1 | ||
- setuptools=58.0.4=py38hecd8cb5_0 | ||
- six=1.16.0=pyh6c4a22f_0 | ||
- sqlalchemy=1.4.32=py38hca72f7f_0 | ||
- sqlite=3.37.2=h707629a_0 | ||
- stack_data=0.1.4=pyhd8ed1ab_0 | ||
- statsmodels=0.12.2=py38h9ed2024_0 | ||
- tbb=2021.5.0=haf03e11_0 | ||
- tenacity=8.0.1=py38hecd8cb5_0 | ||
- terminado=0.13.1=py38hecd8cb5_0 | ||
- testpath=0.5.0=pyhd3eb1b0_0 | ||
- tk=8.6.11=h7bc2e8c_0 | ||
- tomli=2.0.1=pyhd8ed1ab_0 | ||
- tornado=6.1=py38h96a0964_2 | ||
- traitlets=5.1.1=pyhd8ed1ab_0 | ||
- typed-ast=1.5.2=py38h96a0964_0 | ||
- typing_extensions=4.1.1=pyha770c72_0 | ||
- urllib3=1.26.8=pyhd8ed1ab_1 | ||
- wcwidth=0.2.5=pyh9f0ad1d_2 | ||
- webencodings=0.5.1=py38_1 | ||
- wheel=0.37.1=pyhd3eb1b0_0 | ||
- widgetsnbextension=3.5.2=py38hecd8cb5_0 | ||
- xz=5.2.5=h1de35cc_0 | ||
- yarg=0.1.9=py_1 | ||
- zeromq=4.3.4=he49afe7_1 | ||
- zipp=3.7.0=pyhd3eb1b0_0 | ||
- zlib=1.2.11=h4dc903c_4 | ||
- zstd=1.4.9=h322a384_0 | ||
- pip: | ||
- datetime==4.4 | ||
- imageio==2.16.0 | ||
- joblib==1.1.0 | ||
- pytz==2021.3 | ||
- savingfigr==1.0.3 | ||
- savingfigures==1.0.3 | ||
- scikit-learn==1.0.2 | ||
- sklearn==0.0 | ||
- threadpoolctl==3.1.0 | ||
- zope-interface==5.4.0 | ||
prefix: /Users/Semrau_Lab/opt/anaconda3/envs/math637 |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Logistic Regression parameter search\n", | ||
"\n", | ||
"Duncan Tulimieri" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# import libraries \n", | ||
"import numpy as np\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"plt.rcParams.update({'font.size': 16}) \n", | ||
"from sklearn.model_selection import GridSearchCV\n", | ||
"from sklearn.linear_model import LogisticRegression\n", | ||
"import warnings \n", | ||
"warnings.filterwarnings('ignore')\n", | ||
"import time \n", | ||
"import seaborn as sns\n", | ||
"# personal classes\n", | ||
"from ProcessData import ProcessForestData\n", | ||
"import savingfigR as sf" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"class pLogisticRegression(ProcessForestData):\n", | ||
" \n", | ||
" penalty_options = ['l1', 'l2', 'elasticnet', 'none']\n", | ||
" C_options = np.linspace(0.01, 1, 5)\n", | ||
" intercept_options = [True, False]\n", | ||
" l1_ratio_options = np.linspace(0, 1, 5)\n", | ||
" \n", | ||
" def __init__(self):\n", | ||
" # method calls \n", | ||
" self.X_train, self.X_test, self.y_train, self.y_test = self.load_data(perform_scale=True, sub_data_section='')\n", | ||
" self.un_classifiers = np.unique(self.y_train)\n", | ||
" # Raw data \n", | ||
" start = time.time()\n", | ||
" LR_trained_opt = self.optimize_LogisticRegression_params(self.X_train, self.y_train, self.penalty_options, self.C_options, self.intercept_options, self.l1_ratio_options)\n", | ||
" LR_score = self.score_LogisticRegression(LR_trained_opt, self.X_test, self.y_test)\n", | ||
" end = time.time()\n", | ||
" print(f'Raw data LogisticRegression optimal score = {LR_score}')\n", | ||
" print(f'Time taken = {end-start}')\n", | ||
"\n", | ||
" # model\n", | ||
" def train_LogisticRegression(self, X, y, penalty, C, fit_B0, l1_ratio):\n", | ||
" if penalty == 'elasticnet':\n", | ||
" return LogisticRegression(penalty=penalty, C=C, fit_intercept=fit_B0, l1_ratio=l1_ratio, n_jobs=4, solver='saga').fit(X, y)\n", | ||
" else: \n", | ||
" return LogisticRegression(penalty=penalty, C=C, fit_intercept=fit_B0, n_jobs=4, solver='saga').fit(X, y)\n", | ||
"\n", | ||
" def score_LogisticRegression(self, trained_LogisticRegression_model, X_test, y_test):\n", | ||
" return trained_LogisticRegression_model.score(X_test, y_test)\n", | ||
"\n", | ||
" def predict_LogisticRegression(self, trained_LogisticRegression_model, X_test):\n", | ||
" return trained_LogisticRegression_model.predict(X_test)\n", | ||
"\n", | ||
" def optimize_LogisticRegression_params(self, X_train, y_train, penalty_options=penalty_options, C_options=C_options, intercept_options=intercept_options, l1_ratio_options=l1_ratio_options, cv=10, scoring='accuracy'):\n", | ||
" LogisticRegression_raw = LogisticRegression()\n", | ||
" cv_train_model = GridSearchCV(LogisticRegression_raw, param_grid={'penalty':penalty_options, 'C': C_options, 'fit_intercept':intercept_options, 'l1_ratio':l1_ratio_options}, cv=cv, scoring=scoring).fit(X_train, y_train)\n", | ||
" print(f'Best LogisticRegression parameters: penalty = {cv_train_model.best_params_[\"penalty\"]}, C = {cv_train_model.best_params_[\"C\"]}, fit_intercept = {cv_train_model.best_params_[\"fit_intercept\"]}, l1_ratio = {cv_train_model.best_params_[\"l1_ratio\"]}')\n", | ||
" best_model = self.train_LogisticRegression(X_train, y_train, penalty=cv_train_model.best_params_[\"penalty\"], C=cv_train_model.best_params_[\"C\"], fit_B0=cv_train_model.best_params_[\"fit_intercept\"], l1_ratio=cv_train_model.best_params_[\"l1_ratio\"])\n", | ||
" return best_model " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Best LogisticRegression parameters: penalty = l2, C = 0.7525, fit_intercept = True, l1_ratio = 0.0\n", | ||
"Raw data LogisticRegression optimal score = 0.7166304078429213\n", | ||
"Time taken = 14484.176457881927\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"testLR = pLogisticRegression()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"interpreter": { | ||
"hash": "e5ced7bbea2155d302b976f4184419b8d40f50030e781605408c0dc76f430f24" | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3.8.10 ('math637')", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.10" | ||
}, | ||
"orig_nbformat": 4 | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.