|
32 | 32 | "import numpy as np\n",
|
33 | 33 | "import matplotlib.pyplot as plt\n",
|
34 | 34 | "import seaborn as sns\n",
|
| 35 | + "\n", |
35 | 36 | "sns.set_style(\"ticks\")\n",
|
36 | 37 | "sns.set_context(\"notebook\")\n",
|
37 | 38 | "\n",
|
38 | 39 | "from urllib.request import urlretrieve\n",
|
39 |
| - "data_url_dihedrals =\"https://figshare.com/ndownloader/files/36359700\"\n", |
| 40 | + "\n", |
| 41 | + "data_url_dihedrals = \"https://figshare.com/ndownloader/files/36359700\"\n", |
40 | 42 | "path_dihedrals = \"./cln025traj_dihedrals_decimated_equilibrated.npy\"\n",
|
41 |
| - "data_url_distances =\"https://figshare.com/ndownloader/files/36359697\"\n", |
| 43 | + "data_url_distances = \"https://figshare.com/ndownloader/files/36359697\"\n", |
42 | 44 | "path_distances = \"./cln025traj_distances_decimated_equilibrated.npy\""
|
43 | 45 | ]
|
44 | 46 | },
|
|
79 | 81 | }
|
80 | 82 | ],
|
81 | 83 | "source": [
|
82 |
| - "#download dihedral representation data from Figshare\n", |
| 84 | + "# download dihedral representation data from Figshare\n", |
83 | 85 | "urlretrieve(data_url_dihedrals, path_dihedrals)"
|
84 | 86 | ]
|
85 | 87 | },
|
|
92 | 94 | "source": [
|
93 | 95 | "all_dihedrals = np.load(path_dihedrals)\n",
|
94 | 96 | "# dihedral names in order:\n",
|
95 |
| - "# phi1 phi2 phi3 phi4 phi5 phi6 phi7 phi8 phi9 \n", |
96 |
| - "# psi1 psi2 psi3 psi4 psi5 psi6 psi7 psi8 psi9 \n", |
97 |
| - "# chi1_1 chi1_2 chi1_3 chi1_5 chi1_6 chi1_8 chi1_9 chi1_10 \n", |
| 97 | + "# phi1 phi2 phi3 phi4 phi5 phi6 phi7 phi8 phi9\n", |
| 98 | + "# psi1 psi2 psi3 psi4 psi5 psi6 psi7 psi8 psi9\n", |
| 99 | + "# chi1_1 chi1_2 chi1_3 chi1_5 chi1_6 chi1_8 chi1_9 chi1_10\n", |
98 | 100 | "# chi2_1 chi2_2 chi2_3 chi2_5 chi2_9 chi2_10"
|
99 | 101 | ]
|
100 | 102 | },
|
|
113 | 115 | }
|
114 | 116 | ],
|
115 | 117 | "source": [
|
116 |
| - "# we then select a subset of 15 dihedrals identified as the most informative \n", |
| 118 | + "# we then select a subset of 15 dihedrals identified as the most informative\n", |
117 | 119 | "# using the information imbalance greedy optimisation of (Glielmo et al., PNAS Nexus, 2022)\n",
|
118 | 120 | "# the final dataset is described by only 15 features\n",
|
119 |
| - "coords_from_information_imbalance = [1, 4, 5, 7, 10, 12, 13, 14, 15, 16, 17, 18, 19, 24, 25]\n", |
| 121 | + "coords_from_information_imbalance = [\n", |
| 122 | + " 1,\n", |
| 123 | + " 4,\n", |
| 124 | + " 5,\n", |
| 125 | + " 7,\n", |
| 126 | + " 10,\n", |
| 127 | + " 12,\n", |
| 128 | + " 13,\n", |
| 129 | + " 14,\n", |
| 130 | + " 15,\n", |
| 131 | + " 16,\n", |
| 132 | + " 17,\n", |
| 133 | + " 18,\n", |
| 134 | + " 19,\n", |
| 135 | + " 24,\n", |
| 136 | + " 25,\n", |
| 137 | + "]\n", |
120 | 138 | "selected_dihedrals = all_dihedrals[:, coords_from_information_imbalance]\n",
|
121 | 139 | "\n",
|
122 | 140 | "print(selected_dihedrals.shape)"
|
|
151 | 169 | ],
|
152 | 170 | "source": [
|
153 | 171 | "# initialise a Data object\n",
|
154 |
| - "d_dihedrals = Data(selected_dihedrals+np.pi, verbose=False)\n", |
| 172 | + "d_dihedrals = Data(selected_dihedrals + np.pi, verbose=False)\n", |
155 | 173 | "# compute distances by setting the correct period\n",
|
156 |
| - "d_dihedrals.compute_distances(maxk=min(selected_dihedrals.shape[0]-1, 10000), period=2.*np.pi)\n", |
| 174 | + "d_dihedrals.compute_distances(\n", |
| 175 | + " maxk=min(selected_dihedrals.shape[0] - 1, 10000), period=2.0 * np.pi\n", |
| 176 | + ")\n", |
157 | 177 | "# estimate the intrinsic dimension\n",
|
158 | 178 | "d_dihedrals.compute_id_2NN()"
|
159 | 179 | ]
|
|
169 | 189 | "source": [
|
170 | 190 | "# ID scaling analysig using two different methods\n",
|
171 | 191 | "ids_2nn, errs_2nn, scales_2nn = d_dihedrals.return_id_scaling_2NN()\n",
|
172 |
| - "ids_gride, errs_gride, scales_gride = d_dihedrals.return_id_scaling_gride(range_max=1024)" |
| 192 | + "ids_gride, errs_gride, scales_gride = d_dihedrals.return_id_scaling_gride(\n", |
| 193 | + " range_max=1024\n", |
| 194 | + ")" |
173 | 195 | ]
|
174 | 196 | },
|
175 | 197 | {
|
|
192 | 214 | }
|
193 | 215 | ],
|
194 | 216 | "source": [
|
195 |
| - "col = 'darkorange'\n", |
| 217 | + "col = \"darkorange\"\n", |
196 | 218 | "plt.plot(scales_2nn, ids_2nn, alpha=0.85)\n",
|
197 |
| - "plt.errorbar(scales_2nn, ids_2nn, errs_2nn, fmt='None')\n", |
198 |
| - "plt.scatter(scales_2nn, ids_2nn, edgecolors='k',s=50,label='2nn decimation')\n", |
| 219 | + "plt.errorbar(scales_2nn, ids_2nn, errs_2nn, fmt=\"None\")\n", |
| 220 | + "plt.scatter(scales_2nn, ids_2nn, edgecolors=\"k\", s=50, label=\"2nn decimation\")\n", |
199 | 221 | "plt.plot(scales_gride, ids_gride, alpha=0.85, color=col)\n",
|
200 |
| - "plt.errorbar(scales_gride, ids_gride, errs_gride, fmt='None',color=col)\n", |
201 |
| - "plt.scatter(scales_gride, ids_gride, edgecolors='k',color=col,s=50,label='2nn gride')\n", |
202 |
| - "plt.xlabel(r'Scale',size=15)\n", |
203 |
| - "plt.ylabel('Estimated ID',size=15)\n", |
| 222 | + "plt.errorbar(scales_gride, ids_gride, errs_gride, fmt=\"None\", color=col)\n", |
| 223 | + "plt.scatter(scales_gride, ids_gride, edgecolors=\"k\", color=col, s=50, label=\"2nn gride\")\n", |
| 224 | + "plt.xlabel(r\"Scale\", size=15)\n", |
| 225 | + "plt.ylabel(\"Estimated ID\", size=15)\n", |
204 | 226 | "plt.xticks(size=15)\n",
|
205 | 227 | "plt.yticks(size=15)\n",
|
206 |
| - "plt.legend(frameon=False,fontsize=14)\n", |
| 228 | + "plt.legend(frameon=False, fontsize=14)\n", |
207 | 229 | "plt.tight_layout()"
|
208 | 230 | ]
|
209 | 231 | },
|
|
227 | 249 | ],
|
228 | 250 | "source": [
|
229 | 251 | "# estimate density via PAk\n",
|
230 |
| - "d_dihedrals.set_id(7.)\n", |
| 252 | + "d_dihedrals.set_id(7.0)\n", |
231 | 253 | "d_dihedrals.compute_density_PAk()"
|
232 | 254 | ]
|
233 | 255 | },
|
|
250 | 272 | ],
|
251 | 273 | "source": [
|
252 | 274 | "# cluster data via Advanced Density Peak\n",
|
253 |
| - "d_dihedrals.compute_clustering_ADP(Z=4.5,halo=False);\n", |
| 275 | + "d_dihedrals.compute_clustering_ADP(Z=4.5, halo=False)\n", |
254 | 276 | "n_clusters = len(d_dihedrals.cluster_centers)\n",
|
255 | 277 | "print(n_clusters)"
|
256 | 278 | ]
|
|
275 | 297 | }
|
276 | 298 | ],
|
277 | 299 | "source": [
|
278 |
| - "pl.get_dendrogram(d_dihedrals, cmap='Set2', logscale=False)" |
| 300 | + "pl.get_dendrogram(d_dihedrals, cmap=\"Set2\", logscale=False)" |
279 | 301 | ]
|
280 | 302 | },
|
281 | 303 | {
|
|
299 | 321 | ],
|
300 | 322 | "source": [
|
301 | 323 | "# Cluster populations\n",
|
302 |
| - "populations = [ len(el) for r_,el in enumerate(d_dihedrals.cluster_indices)]\n", |
| 324 | + "populations = [len(el) for r_, el in enumerate(d_dihedrals.cluster_indices)]\n", |
303 | 325 | "populations"
|
304 | 326 | ]
|
305 | 327 | },
|
|
420 | 442 | }
|
421 | 443 | ],
|
422 | 444 | "source": [
|
423 |
| - "d_distances = Data(heavy_atom_distances,verbose=False)\n", |
424 |
| - "d_distances.compute_distances(maxk=min(heavy_atom_distances.shape[0]-1,10000))\n", |
| 445 | + "d_distances = Data(heavy_atom_distances, verbose=False)\n", |
| 446 | + "d_distances.compute_distances(maxk=min(heavy_atom_distances.shape[0] - 1, 10000))\n", |
425 | 447 | "d_distances.compute_id_2NN()"
|
426 | 448 | ]
|
427 | 449 | },
|
|
434 | 456 | "source": [
|
435 | 457 | "# ID scaling analysig using two different methods\n",
|
436 | 458 | "ids_2nn, errs_2nn, scales_2nn = d_distances.return_id_scaling_2NN()\n",
|
437 |
| - "ids_gride, errs_gride, scales_gride = d_distances.return_id_scaling_gride(range_max=1024)" |
| 459 | + "ids_gride, errs_gride, scales_gride = d_distances.return_id_scaling_gride(\n", |
| 460 | + " range_max=1024\n", |
| 461 | + ")" |
438 | 462 | ]
|
439 | 463 | },
|
440 | 464 | {
|
|
457 | 481 | }
|
458 | 482 | ],
|
459 | 483 | "source": [
|
460 |
| - "col = 'darkorange'\n", |
| 484 | + "col = \"darkorange\"\n", |
461 | 485 | "plt.plot(scales_2nn, ids_2nn, alpha=0.85)\n",
|
462 |
| - "plt.errorbar(scales_2nn, ids_2nn, errs_2nn, fmt='None')\n", |
463 |
| - "plt.scatter(scales_2nn, ids_2nn, edgecolors='k',s=50,label='2nn decimation')\n", |
| 486 | + "plt.errorbar(scales_2nn, ids_2nn, errs_2nn, fmt=\"None\")\n", |
| 487 | + "plt.scatter(scales_2nn, ids_2nn, edgecolors=\"k\", s=50, label=\"2nn decimation\")\n", |
464 | 488 | "plt.plot(scales_gride, ids_gride, alpha=0.85, color=col)\n",
|
465 |
| - "plt.errorbar(scales_gride, ids_gride, errs_gride, fmt='None',color=col)\n", |
466 |
| - "plt.scatter(scales_gride, ids_gride, edgecolors='k',color=col,s=50,label='2nn gride')\n", |
467 |
| - "plt.xlabel(r'Scale',size=15)\n", |
468 |
| - "plt.ylabel('Estimated ID',size=15)\n", |
| 489 | + "plt.errorbar(scales_gride, ids_gride, errs_gride, fmt=\"None\", color=col)\n", |
| 490 | + "plt.scatter(scales_gride, ids_gride, edgecolors=\"k\", color=col, s=50, label=\"2nn gride\")\n", |
| 491 | + "plt.xlabel(r\"Scale\", size=15)\n", |
| 492 | + "plt.ylabel(\"Estimated ID\", size=15)\n", |
469 | 493 | "plt.xticks(size=15)\n",
|
470 | 494 | "plt.yticks(size=15)\n",
|
471 |
| - "plt.legend(frameon=False,fontsize=14)\n", |
| 495 | + "plt.legend(frameon=False, fontsize=14)\n", |
472 | 496 | "plt.tight_layout()"
|
473 | 497 | ]
|
474 | 498 | },
|
|
489 | 513 | ],
|
490 | 514 | "source": [
|
491 | 515 | "# estimate density via PAk\n",
|
492 |
| - "d_distances.set_id(9.)\n", |
| 516 | + "d_distances.set_id(9.0)\n", |
493 | 517 | "d_distances.compute_density_PAk()\n",
|
494 | 518 | "# cluster data via Advanced Density Peak\n",
|
495 |
| - "d_distances.compute_clustering_ADP(Z=3.5,halo=False);\n", |
| 519 | + "d_distances.compute_clustering_ADP(Z=3.5, halo=False)\n", |
496 | 520 | "n_clusters = len(d_dihedrals.cluster_centers)\n",
|
497 | 521 | "print(n_clusters)"
|
498 | 522 | ]
|
|
519 | 543 | }
|
520 | 544 | ],
|
521 | 545 | "source": [
|
522 |
| - "pl.get_dendrogram(d_distances, cmap='Set2', logscale=False)" |
| 546 | + "pl.get_dendrogram(d_distances, cmap=\"Set2\", logscale=False)" |
523 | 547 | ]
|
524 | 548 | },
|
525 | 549 | {
|
|
541 | 565 | ],
|
542 | 566 | "source": [
|
543 | 567 | "# Cluster populations\n",
|
544 |
| - "populations = [ len(el) for r_,el in enumerate(d_distances.cluster_indices)]\n", |
| 568 | + "populations = [len(el) for r_, el in enumerate(d_distances.cluster_indices)]\n", |
545 | 569 | "populations"
|
546 | 570 | ]
|
547 | 571 | },
|
|
606 | 630 | ],
|
607 | 631 | "source": [
|
608 | 632 | "# number of elements in common before permutation\n",
|
609 |
| - "sum(d_distances.cluster_assignment == d_dihedrals.cluster_assignment)/d_dihedrals.N" |
| 633 | + "sum(d_distances.cluster_assignment == d_dihedrals.cluster_assignment) / d_dihedrals.N" |
610 | 634 | ]
|
611 | 635 | },
|
612 | 636 | {
|
|
645 | 669 | ],
|
646 | 670 | "source": [
|
647 | 671 | "# number of elements in common after permutation\n",
|
648 |
| - "sum(distances_cluster_assignments_2 == d_dihedrals.cluster_assignment)/d_dihedrals.N" |
| 672 | + "sum(distances_cluster_assignments_2 == d_dihedrals.cluster_assignment) / d_dihedrals.N" |
649 | 673 | ]
|
650 | 674 | },
|
651 | 675 | {
|
|
0 commit comments