Skip to content

Commit

Permalink
initial commit of files
Browse files Browse the repository at this point in the history
  • Loading branch information
tulimid1 committed May 14, 2022
1 parent b617ae1 commit 982eae7
Show file tree
Hide file tree
Showing 54 changed files with 2,024 additions and 0 deletions.
Binary file modified .DS_Store
Binary file not shown.
Binary file added Code/.DS_Store
Binary file not shown.
120 changes: 120 additions & 0 deletions Code/Blackard_Dean_99.ipynb

Large diffs are not rendered by default.

161 changes: 161 additions & 0 deletions Code/CoverTypeProject.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
name: math637
channels:
- conda-forge
- defaults
dependencies:
- appnope=0.1.2=py38h50d1736_2
- argon2-cffi=20.1.0=py38h9ed2024_1
- asttokens=2.0.5=pyhd8ed1ab_0
- attrs=21.4.0=pyhd3eb1b0_0
- backcall=0.2.0=pyh9f0ad1d_0
- backports=1.0=py_2
- backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
- black=22.1.0=pyhd8ed1ab_0
- blas=1.0=mkl
- bleach=4.1.0=pyhd3eb1b0_0
- bottleneck=1.3.2=py38hf1fa96c_1
- brotli=1.0.9=hb1e8313_2
- brotlipy=0.7.0=py38h96a0964_1003
- ca-certificates=2022.3.29=hecd8cb5_1
- certifi=2021.10.8=py38hecd8cb5_2
- cffi=1.15.0=py38hc55c11b_1
- charset-normalizer=2.0.11=pyhd8ed1ab_0
- click=8.0.3=py38h50d1736_1
- cryptography=36.0.1=py38h56c4533_0
- cycler=0.11.0=pyhd3eb1b0_0
- dataclasses=0.8=pyhc8e2a94_3
- debugpy=1.5.1=py38ha048514_0
- decorator=5.1.1=pyhd8ed1ab_0
- defusedxml=0.7.1=pyhd3eb1b0_0
- docopt=0.6.2=py_1
- entrypoints=0.4=pyhd8ed1ab_0
- executing=0.8.2=pyhd8ed1ab_0
- fonttools=4.25.0=pyhd3eb1b0_0
- freetype=2.11.0=hd8bbffd_0
- giflib=5.2.1=haf1e3a3_0
- greenlet=1.1.1=py38h23ab428_0
- idna=3.3=pyhd8ed1ab_0
- importlib-metadata=4.8.2=py38hecd8cb5_0
- importlib_metadata=4.8.2=hd3eb1b0_0
- intel-openmp=2021.4.0=hecd8cb5_3538
- ipykernel=6.9.0=py38h5fd9f69_0
- ipympl=0.8.7=pyhd3eb1b0_0
- ipython=8.0.1=py38h50d1736_0
- ipython_genutils=0.2.0=pyhd3eb1b0_1
- ipywidgets=7.6.5=pyhd3eb1b0_1
- jedi=0.18.1=py38h50d1736_0
- jinja2=3.0.2=pyhd3eb1b0_0
- jpeg=9d=h9ed2024_0
- jsonschema=3.2.0=pyhd3eb1b0_2
- jupyter_client=7.1.2=pyhd8ed1ab_0
- jupyter_core=4.9.1=py38h50d1736_1
- jupyterlab_pygments=0.1.2=py_0
- jupyterlab_widgets=1.0.0=pyhd3eb1b0_1
- kiwisolver=1.3.1=py38h23ab428_0
- lcms2=2.12=hf1fd2bf_0
- libcxx=12.0.0=h2f01273_0
- libffi=3.3=hb1e8313_2
- libgfortran=3.0.1=h93005f0_2
- libllvm11=11.1.0=h9b2ccf5_0
- libpng=1.6.37=ha441bb4_0
- libsodium=1.0.18=hbcb3906_1
- libtiff=4.2.0=h87d7836_0
- libwebp=1.2.0=hacca55c_0
- libwebp-base=1.2.0=h9ed2024_0
- llvm-openmp=12.0.0=h0dcd299_1
- llvmlite=0.37.0=py38he4411ff_1
- lz4-c=1.9.3=h23ab428_1
- markupsafe=2.0.1=py38h9ed2024_0
- matplotlib=3.5.0=py38hecd8cb5_0
- matplotlib-base=3.5.0=py38h4f681db_0
- matplotlib-inline=0.1.3=pyhd8ed1ab_0
- mistune=0.8.4=py38h1de35cc_1001
- mkl=2021.4.0=hecd8cb5_637
- mkl-service=2.4.0=py38h9ed2024_0
- mkl_fft=1.3.1=py38h4ab4a9b_0
- mkl_random=1.2.2=py38hb2f4e1b_0
- munkres=1.1.4=py_0
- mypy_extensions=0.4.3=py38h50d1736_4
- nbclient=0.5.11=pyhd3eb1b0_0
- nbconvert=6.3.0=py38hecd8cb5_0
- nbformat=5.1.3=pyhd3eb1b0_0
- ncurses=6.3=hca72f7f_2
- nest-asyncio=1.5.4=pyhd8ed1ab_0
- notebook=6.4.8=py38hecd8cb5_0
- numba=0.54.1=py38hae1ba45_0
- numexpr=2.8.1=py38h2e5f0a9_0
- numpy=1.20.3=py38h4b4dc7a_0
- numpy-base=1.20.3=py38he0bd621_0
- olefile=0.46=pyhd3eb1b0_0
- openssl=1.1.1n=hca72f7f_0
- packaging=21.3=pyhd3eb1b0_0
- pandas=1.3.5=py38h743cdd8_0
- pandocfilters=1.5.0=pyhd3eb1b0_0
- parso=0.8.3=pyhd8ed1ab_0
- pathspec=0.9.0=pyhd8ed1ab_0
- patsy=0.5.2=py38hecd8cb5_1
- pexpect=4.8.0=pyh9f0ad1d_2
- pickleshare=0.7.5=py_1003
- pillow=8.4.0=py38h98e4679_0
- pip=21.2.4=py38hecd8cb5_0
- pipreqs=0.4.10=py_0
- platformdirs=2.5.0=pyhd8ed1ab_0
- plotly=5.6.0=pyhd3eb1b0_0
- prometheus_client=0.13.1=pyhd3eb1b0_0
- prompt-toolkit=3.0.27=pyha770c72_0
- ptyprocess=0.7.0=pyhd3deb0d_0
- pure_eval=0.2.2=pyhd8ed1ab_0
- pycparser=2.21=pyhd8ed1ab_0
- pygments=2.11.2=pyhd8ed1ab_0
- pyopenssl=22.0.0=pyhd8ed1ab_0
- pyparsing=3.0.4=pyhd3eb1b0_0
- pyrsistent=0.18.0=py38hca72f7f_0
- pysocks=1.7.1=py38h50d1736_4
- python=3.8.10=h88f2d9e_7
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python_abi=3.8=2_cp38
- pyzmq=22.3.0=py38hd3b92b6_1
- readline=8.1.2=hca72f7f_1
- requests=2.27.1=pyhd8ed1ab_0
- scipy=1.7.3=py38h8c7af03_0
- seaborn=0.11.2=pyhd3eb1b0_0
- send2trash=1.8.0=pyhd3eb1b0_1
- setuptools=58.0.4=py38hecd8cb5_0
- six=1.16.0=pyh6c4a22f_0
- sqlalchemy=1.4.32=py38hca72f7f_0
- sqlite=3.37.2=h707629a_0
- stack_data=0.1.4=pyhd8ed1ab_0
- statsmodels=0.12.2=py38h9ed2024_0
- tbb=2021.5.0=haf03e11_0
- tenacity=8.0.1=py38hecd8cb5_0
- terminado=0.13.1=py38hecd8cb5_0
- testpath=0.5.0=pyhd3eb1b0_0
- tk=8.6.11=h7bc2e8c_0
- tomli=2.0.1=pyhd8ed1ab_0
- tornado=6.1=py38h96a0964_2
- traitlets=5.1.1=pyhd8ed1ab_0
- typed-ast=1.5.2=py38h96a0964_0
- typing_extensions=4.1.1=pyha770c72_0
- urllib3=1.26.8=pyhd8ed1ab_1
- wcwidth=0.2.5=pyh9f0ad1d_2
- webencodings=0.5.1=py38_1
- wheel=0.37.1=pyhd3eb1b0_0
- widgetsnbextension=3.5.2=py38hecd8cb5_0
- xz=5.2.5=h1de35cc_0
- yarg=0.1.9=py_1
- zeromq=4.3.4=he49afe7_1
- zipp=3.7.0=pyhd3eb1b0_0
- zlib=1.2.11=h4dc903c_4
- zstd=1.4.9=h322a384_0
- pip:
- datetime==4.4
- imageio==2.16.0
- joblib==1.1.0
- pytz==2021.3
- savingfigr==1.0.3
- savingfigures==1.0.3
- scikit-learn==1.0.2
- sklearn==0.0
- threadpoolctl==3.1.0
- zope-interface==5.4.0
prefix: /Users/Semrau_Lab/opt/anaconda3/envs/math637
170 changes: 170 additions & 0 deletions Code/KNN_param_search.ipynb

Large diffs are not rendered by default.

163 changes: 163 additions & 0 deletions Code/LDA_param_search.ipynb

Large diffs are not rendered by default.

124 changes: 124 additions & 0 deletions Code/LogisticReg_param_search.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Logistic Regression parameter search\n",
"\n",
"Duncan Tulimieri"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# import libraries \n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"plt.rcParams.update({'font.size': 16}) \n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn.linear_model import LogisticRegression\n",
"import warnings \n",
"warnings.filterwarnings('ignore')\n",
"import time \n",
"import seaborn as sns\n",
"# personal classes\n",
"from ProcessData import ProcessForestData\n",
"import savingfigR as sf"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class pLogisticRegression(ProcessForestData):\n",
" \n",
" penalty_options = ['l1', 'l2', 'elasticnet', 'none']\n",
" C_options = np.linspace(0.01, 1, 5)\n",
" intercept_options = [True, False]\n",
" l1_ratio_options = np.linspace(0, 1, 5)\n",
" \n",
" def __init__(self):\n",
" # method calls \n",
" self.X_train, self.X_test, self.y_train, self.y_test = self.load_data(perform_scale=True, sub_data_section='')\n",
" self.un_classifiers = np.unique(self.y_train)\n",
" # Raw data \n",
" start = time.time()\n",
" LR_trained_opt = self.optimize_LogisticRegression_params(self.X_train, self.y_train, self.penalty_options, self.C_options, self.intercept_options, self.l1_ratio_options)\n",
" LR_score = self.score_LogisticRegression(LR_trained_opt, self.X_test, self.y_test)\n",
" end = time.time()\n",
" print(f'Raw data LogisticRegression optimal score = {LR_score}')\n",
" print(f'Time taken = {end-start}')\n",
"\n",
" # model\n",
" def train_LogisticRegression(self, X, y, penalty, C, fit_B0, l1_ratio):\n",
" if penalty == 'elasticnet':\n",
" return LogisticRegression(penalty=penalty, C=C, fit_intercept=fit_B0, l1_ratio=l1_ratio, n_jobs=4, solver='saga').fit(X, y)\n",
" else: \n",
" return LogisticRegression(penalty=penalty, C=C, fit_intercept=fit_B0, n_jobs=4, solver='saga').fit(X, y)\n",
"\n",
" def score_LogisticRegression(self, trained_LogisticRegression_model, X_test, y_test):\n",
" return trained_LogisticRegression_model.score(X_test, y_test)\n",
"\n",
" def predict_LogisticRegression(self, trained_LogisticRegression_model, X_test):\n",
" return trained_LogisticRegression_model.predict(X_test)\n",
"\n",
" def optimize_LogisticRegression_params(self, X_train, y_train, penalty_options=penalty_options, C_options=C_options, intercept_options=intercept_options, l1_ratio_options=l1_ratio_options, cv=10, scoring='accuracy'):\n",
" LogisticRegression_raw = LogisticRegression()\n",
" cv_train_model = GridSearchCV(LogisticRegression_raw, param_grid={'penalty':penalty_options, 'C': C_options, 'fit_intercept':intercept_options, 'l1_ratio':l1_ratio_options}, cv=cv, scoring=scoring).fit(X_train, y_train)\n",
" print(f'Best LogisticRegression parameters: penalty = {cv_train_model.best_params_[\"penalty\"]}, C = {cv_train_model.best_params_[\"C\"]}, fit_intercept = {cv_train_model.best_params_[\"fit_intercept\"]}, l1_ratio = {cv_train_model.best_params_[\"l1_ratio\"]}')\n",
" best_model = self.train_LogisticRegression(X_train, y_train, penalty=cv_train_model.best_params_[\"penalty\"], C=cv_train_model.best_params_[\"C\"], fit_B0=cv_train_model.best_params_[\"fit_intercept\"], l1_ratio=cv_train_model.best_params_[\"l1_ratio\"])\n",
" return best_model "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best LogisticRegression parameters: penalty = l2, C = 0.7525, fit_intercept = True, l1_ratio = 0.0\n",
"Raw data LogisticRegression optimal score = 0.7166304078429213\n",
"Time taken = 14484.176457881927\n"
]
}
],
"source": [
"testLR = pLogisticRegression()"
]
}
],
"metadata": {
"interpreter": {
"hash": "e5ced7bbea2155d302b976f4184419b8d40f50030e781605408c0dc76f430f24"
},
"kernelspec": {
"display_name": "Python 3.8.10 ('math637')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 982eae7

Please sign in to comment.