diff --git a/healpix_gaia_query.ipynb b/notebooks/error_field/healpix_gaia_query.ipynb similarity index 99% rename from healpix_gaia_query.ipynb rename to notebooks/error_field/healpix_gaia_query.ipynb index 2b4561c7..947d5313 100644 --- a/healpix_gaia_query.ipynb +++ b/notebooks/error_field/healpix_gaia_query.ipynb @@ -2,189 +2,56 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created TAP+ (v1.2.1) - Connection:\n", - "\tHost: gea.esac.esa.int\n", - "\tUse HTTPS: True\n", - "\tPort: 443\n", - "\tSSL Port: 443\n", - "Created TAP+ (v1.2.1) - Connection:\n", - "\tHost: geadata.esac.esa.int\n", - "\tUse HTTPS: True\n", - "\tPort: 443\n", - "\tSSL Port: 443\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/cal/ccarr/anaconda3/lib/python3.7/site-packages/pandas/compat/_optional.py:138: UserWarning: Pandas requires version '2.7.0' or newer of 'numexpr' (version '2.6.9' currently installed).\n", - " warnings.warn(msg, UserWarning)\n" + "ename": "ModuleNotFoundError", + "evalue": "No module named 'seaborn'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/k1/2dc5x9j10c553vvp6f40r89h0000gn/T/ipykernel_72567/449606315.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mscipy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minterpolate\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0minterp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mscipy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msignal\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msig\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mseaborn\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 14\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'seaborn'" ] } ], "source": [ - "import warnings\n", - "import healpy as hp\n", - "from astroquery.gaia import Gaia\n", - "import tqdm\n", + "# BUILT-IN\n", "import pickle\n", - "from astropy import table\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", + "import warnings\n", + "\n", + "# THIRD PARTY\n", "import astropy.coordinates as coord\n", "import astropy.units as u\n", + "import healpy as hp\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import scipy.interpolate as interp\n", + "import scipy.signal as sig\n", + "import seaborn as sns\n", "import sklearn\n", - "from sklearn import metrics\n", - "from sklearn.svm import SVR\n", - "from sklearn import linear_model\n", - "from sklearn.model_selection import GridSearchCV\n", - "from sklearn.model_selection import learning_curve\n", + "import tqdm\n", + "from astropy import table\n", + "from astroquery.gaia import Gaia\n", + "from scipy.stats import gaussian_kde\n", + "from sklearn import linear_model, metrics\n", + "from sklearn.gaussian_process import GaussianProcessRegressor\n", + "from sklearn.gaussian_process.kernels import ExpSineSquared, WhiteKernel\n", "from sklearn.kernel_ridge import KernelRidge\n", + "from sklearn.model_selection import GridSearchCV, learning_curve\n", "from sklearn.svm import SVR\n", - "from sklearn.utils import shuffle\n", - "from sklearn.gaussian_process import GaussianProcessRegressor\n", - "from sklearn.gaussian_process.kernels import WhiteKernel, ExpSineSquared\n", - "from sklearn.utils import shuffle\n", - "import scipy.signal as sig\n", - "import seaborn as sns\n", - "import scipy.interpolate as interp\n", - "from scipy.stats import gaussian_kde" + "from sklearn.utils import shuffle" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "def plot_parallax_prediction(Xtrue, ytrue, kde, ypred1, ypred2, ypred3, fids):\n", - " \"\"\"\"\"\"\n", - " fig = plt.figure(figsize=(10,8))\n", - " ax = fig.add_subplot(\n", - " xlabel=r\"$\\log_{10}$ parallax [mas]\",\n", - " ylabel=r\"$\\log_{10}$ parallax fractional error\",\n", - " )\n", - " # distance label\n", - " secax = ax.secondary_xaxis(\n", - " \"top\",\n", - " functions=(\n", - " lambda logp: np.log10(\n", - " coord.Distance(parallax=10 ** logp * u.mas).to_value(u.pc)\n", - " ),\n", - " lambda logd: np.log10(\n", - " coord.Distance(10 ** logd * u.pc).parallax.to_value(u.mas)\n", - " ),\n", - " ),\n", - " )\n", - " secax.set_xlabel(r\"$\\log_{10}$ Distance [kpc]\")\n", - " \n", - " Xpred = np.array(\n", - " [\n", - " np.ones(100) * np.median(Xtrue[:, 0]), # ra\n", - " np.ones(100) * np.median(Xtrue[:, 1]), # dec\n", - " np.linspace(Xtrue[:, 2].min(), Xtrue[:, 2].max(), 100), # p\n", - " ]\n", - " ).T\n", - "\n", - " ax.scatter(Xtrue[:, -1], ytrue, s=5, label=\"data\", alpha=0.3, c=kde)\n", - " ax.scatter(Xpred[:, -1], ypred1, s=5, label=\"kernel-ridge\")\n", - " ax.scatter(Xpred[:, -1], ypred2, s=5, label=\"linear model: density-weighting\")\n", - " ax.scatter(Xpred[:, -1], ypred3, s=5, label=\"linear model: no density weight\")\n", - " ax.set_title(str(fids))\n", - " \n", - " ax.set_ylim(-3, 3)\n", - " ax.invert_xaxis()\n", - " ax.legend()\n", - "\n", - " return fig\n", - "\n", - "def kernel_ridge(X, y, train_size):\n", - " \"Kernel-Ridge Regression code\"\n", - " rng = np.random.default_rng()\n", - " kr = GridSearchCV(\n", - " KernelRidge(kernel=\"linear\", gamma=0.1),\n", - " param_grid={\n", - " \"alpha\": [1e0, 0.1, 1e-2, 1e-3],\n", - " \"gamma\": np.logspace(-2, 2, 5),\n", - " },\n", - " )\n", - " \n", - " # randomize the data order\n", - " idx = shuffle(np.arange(0, len(X)), n_samples=train_size)\n", - "\n", - " # Fitting using the Kernel-Ridge Regression\n", - " kr.fit(X[idx], y[idx])\n", - " Xp = np.array(\n", - " [\n", - " np.ones(100) * np.median(X[:, 0]), # ra\n", - " np.ones(100) * np.median(X[:, 1]), # dec\n", - " np.linspace(X[:, 2].min(), X[:, 2].max(), 100), # p\n", - " ]\n", - " ).T\n", - " ykr = kr.predict(Xp)\n", - " return ykr, kr\n", - "\n", - "def Gauss_process(X,y, train_size):\n", - " \"Gaussian-Process Regression code\"\n", - " rng = np.random.default_rng()\n", - " idx = shuffle(np.arange(0, len(X)), n_samples=train_size)\n", - " gpr = GaussianProcessRegressor(kernel=None)\n", - " gpr.fit(X[idx], y[idx])\n", - " ygp = gpr.predict(Xp)\n", - " return ygp, gpr\n", - "\n", - "def support_vector(X,y, train_size):\n", - " \"support-vector regression code\"\n", - " rng = np.random.default_rng()\n", - " svr = GridSearchCV(SVR(kernel='linear', gamma=0.1),\n", - " param_grid={\"C\": [1e0, 1e1, 1e2, 1e3],\n", - " \"gamma\": np.logspace(-2, 2, 5)})\n", - " \n", - " # randomize the data order\n", - " idx = shuffle(np.arange(0, len(X)), n_samples=train_size)\n", - "\n", - " # Fitting using the Kernel-Ridge Regression\n", - " kr.fit(X[idx], y[idx])\n", - " Xp = np.array(\n", - " [\n", - " np.ones(100) * np.median(X[:, 0]), # ra\n", - " np.ones(100) * np.median(X[:, 1]), # dec\n", - " np.linspace(X[:, 2].min(), X[:, 2].max(), 100), # p\n", - " ]\n", - " ).T\n", - " svr.fit(X[idx], y[idx])\n", - " ysv = svr.predict(Xp)\n", - " return ysv, svr\n", - "\n", - "def linear(X, y, train_size, weight=True):\n", - " \"linear regression model\"\n", - " reg = linear_model.LinearRegression()\n", - " \n", - " # randomize the data order\n", - " idx = shuffle(np.arange(0, len(X)), n_samples=train_size)\n", - " xy = np.vstack([X[:,2],y])\n", - " kde = gaussian_kde(xy)(xy)\n", - " if weight==True:\n", - " reg.fit(X[idx], y[idx], sample_weight=(1/kde)[idx])\n", - " else:\n", - " reg.fit(X[idx], y[idx])\n", - " Xp = np.array(\n", - " [\n", - " np.ones(100) * np.median(X[:, 0]), # ra\n", - " np.ones(100) * np.median(X[:, 1]), # dec\n", - " np.linspace(X[:, 2].min(), X[:, 2].max(), 100), # p\n", - " ]\n", - " ).T\n", - " yreg = reg.predict(Xp)\n", - " return yreg, reg " + "hp.order2nside(4)" ] }, { @@ -196,7 +63,6 @@ "name": "stderr", "output_type": "stream", "text": [ - "\r", " 0%| | 0/4 [00:00= 0\n", " AND random_index < 1000000\n", - " \"\"\", dump_to_file=False, verbose=False, )\n", + " \"\"\",\n", + " dump_to_file=False,\n", + " verbose=False,\n", + " )\n", " r = job.get_results()\n", " rgr = r.group_by(\"healpix4\")\n", " print(rgr)\n", - " \n", + "\n", " NPIX = hp.nside2npix(NSIDE)\n", " m = np.arange(NPIX)\n", - " m[setofids[0]:setofids[-1]] = m.max()\n", - " hp.mollview(m, title=\"Mollview image RING\", nest=True, coord=[\"C\"], cbar=False, cmap=cm)\n", - " \n", - " for j in range(0,len(setofids)):\n", - " rg = rgr[rgr['healpix4']==setofids[j]]\n", - " \n", + " m[setofids[0] : setofids[-1]] = m.max()\n", + " hp.mollview(\n", + " m,\n", + " title=\"Mollview image RING\",\n", + " nest=True,\n", + " coord=[\"C\"],\n", + " cbar=False,\n", + " cmap=cm,\n", + " )\n", + "\n", + " for j in range(0, len(setofids)):\n", + " rg = rgr[rgr[\"healpix4\"] == setofids[j]]\n", + "\n", " print(setofids[j], len(rg))\n", "\n", " # DOING STUFF HERE\n", - " #with catch_warnings(UserWarning):\n", + " # with catch_warnings(UserWarning):\n", " df = table.QTable(rg)\n", "\n", " df = df[np.isfinite(df[\"parallax\"])] # filter out NaN\n", @@ -957,23 +836,24 @@ " df[\"parallax_frac_error\"] = df[\"parallax_error\"] / df[\"parallax\"]\n", "\n", " X = np.array(\n", - " [\n", - " df[\"ra\"].to_value(u.deg),\n", - " df[\"dec\"].to_value(u.deg),\n", - " np.log10(df[\"parallax\"].to_value(u.mas)),\n", - " ]).T\n", - " y = np.log10(df[\"parallax_frac_error\"].value.reshape(-1, 1))[:,0]\n", + " [\n", + " df[\"ra\"].to_value(u.deg),\n", + " df[\"dec\"].to_value(u.deg),\n", + " np.log10(df[\"parallax\"].to_value(u.mas)),\n", + " ]\n", + " ).T\n", + " y = np.log10(df[\"parallax_frac_error\"].value.reshape(-1, 1))[:, 0]\n", "\n", - " xy = np.vstack([X[:,2],y])\n", + " xy = np.vstack([X[:, 2], y])\n", " kde = gaussian_kde(xy)(xy)\n", "\n", - " ykr, kr = kernel_ridge(X, y, train_size=int(len(rg)*0.8))\n", - " #ygp, gpr = Gauss_process(X,y, train_size)\n", - " ysv, svr = support_vector(X,y, train_size=int(len(rg)*0.8))\n", - " yreg, reg = linear(X, y, train_size=int(len(rg)*0.8))\n", - " yreg1, reg1 = linear(X, y, train_size=int(len(rg)*0.8), weight=False)\n", - " \n", - " with open(\"pk_reg/pk_\"+str(setofids[j])+\".pkl\", mode=\"wb\") as f:\n", + " ykr, kr = kernel_ridge(X, y, train_size=int(len(rg) * 0.8))\n", + " # ygp, gpr = Gauss_process(X,y, train_size)\n", + " ysv, svr = support_vector(X, y, train_size=int(len(rg) * 0.8))\n", + " yreg, reg = linear(X, y, train_size=int(len(rg) * 0.8))\n", + " yreg1, reg1 = linear(X, y, train_size=int(len(rg) * 0.8), weight=False)\n", + "\n", + " with open(\"pk_reg/pk_\" + str(setofids[j]) + \".pkl\", mode=\"wb\") as f:\n", " pickle.dump(reg, f)\n", "\n", " plot_parallax_prediction(X, y, kde, ykr, yreg, yreg1, setofids[j])" @@ -985,9 +865,9 @@ "metadata": {}, "outputs": [], "source": [ - "with open(\"pk_\"+str(setofids[-1])+\".pkl\", mode=\"rb\") as f:\n", + "with open(\"pk_\" + str(setofids[-1]) + \".pkl\", mode=\"rb\") as f:\n", " testreg = pickle.load(f)\n", - " \n", + "\n", "Xp = np.array(\n", " [\n", " np.ones(100) * np.median(X[:, 0]), # ra\n", @@ -996,8 +876,8 @@ " ]\n", ").T\n", "yreg = testreg.predict(Xp)\n", - " \n", - "fig = plt.figure(figsize=(10,8))\n", + "\n", + "fig = plt.figure(figsize=(10, 8))\n", "ax = fig.add_subplot(\n", " xlabel=r\"$\\log_{10}$ parallax [mas]\",\n", " ylabel=r\"$\\log_{10}$ parallax fractional error\",\n", @@ -1017,18 +897,18 @@ "secax.set_xlabel(r\"$\\log_{10}$ Distance [kpc]\")\n", "\n", "Xpred = np.array(\n", - "[\n", - " np.ones(100) * np.median(X[:, 0]), # ra\n", - " np.ones(100) * np.median(X[:, 1]), # dec\n", - " np.linspace(X[:, 2].min(), X[:, 2].max(), 100), # p\n", - "]\n", + " [\n", + " np.ones(100) * np.median(X[:, 0]), # ra\n", + " np.ones(100) * np.median(X[:, 1]), # dec\n", + " np.linspace(X[:, 2].min(), X[:, 2].max(), 100), # p\n", + " ]\n", ").T\n", "\n", "ax.scatter(X[:, -1], y, s=2, label=\"data\", alpha=0.3, c=kde)\n", - "#ax.scatter(Xpred[:, -1], ypred1, s=2, label=\"kernel-ridge\")\n", - "#ax.scatter(Xpred[:, -1], ypred2, s=2, label=\"linear model: density-weighting\")\n", + "# ax.scatter(Xpred[:, -1], ypred1, s=2, label=\"kernel-ridge\")\n", + "# ax.scatter(Xpred[:, -1], ypred2, s=2, label=\"linear model: density-weighting\")\n", "ax.scatter(Xpred[:, -1], yreg, s=2)\n", - "#ax.set_title(str(fids))\n", + "# ax.set_title(str(fids))\n", "\n", "ax.set_ylim(-3, 3)\n", "ax.invert_xaxis()\n", @@ -1066,9 +946,10 @@ } ], "source": [ - "groups_of_setofids = [(807,808,809,810)]\n", + "groups_of_setofids = [(807, 808, 809, 810)]\n", "for setofids in tqdm.tqdm(groups_of_setofids):\n", - " job = Gaia.launch_job_async(f\"\"\"\n", + " job = Gaia.launch_job_async(\n", + " f\"\"\"\n", " SELECT\n", " source_id, GAIA_HEALPIX_INDEX(5, source_id) AS healpix5,\n", " parallax AS parallax, parallax_error AS parallax_error,\n", @@ -1080,9 +961,12 @@ " WHERE GAIA_HEALPIX_INDEX(5, source_id) IN {setofids}\n", " AND parallax >= 0\n", " AND random_index < 2000000\n", - " \"\"\", dump_to_file=False, verbose=False, )\n", + " \"\"\",\n", + " dump_to_file=False,\n", + " verbose=False,\n", + " )\n", " r = job.get_results()\n", - " #rgr = r.group_by(\"healpix7\")" + " # rgr = r.group_by(\"healpix7\")" ] }, { @@ -1108,8 +992,8 @@ } ], "source": [ - "NSIDE = hp.order2nside(7) # converts norder to nside\n", - "cm = plt.set_cmap('inferno')\n", + "NSIDE = hp.order2nside(7) # converts norder to nside\n", + "cm = plt.set_cmap(\"inferno\")\n", "print(\n", " \"Approximate resolution at NSIDE {} is {:.2} deg\".format(\n", " NSIDE, hp.nside2resol(NSIDE, arcmin=True) / 60\n", @@ -1136,7 +1020,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1150,9 +1034,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.9.5" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }