From 943032c54f768805bf8eaf18fd79b0e0a26350f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= Date: Wed, 27 Nov 2024 11:17:58 -0700 Subject: [PATCH 1/7] Add function to create bokeh plot of robustica clusters --- tedana/reporting/dynamic_figures.py | 109 ++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/tedana/reporting/dynamic_figures.py b/tedana/reporting/dynamic_figures.py index 99c1a6fae..a2aaae380 100644 --- a/tedana/reporting/dynamic_figures.py +++ b/tedana/reporting/dynamic_figures.py @@ -421,3 +421,112 @@ def _link_figures(fig, comptable_ds, div_content, io_generator): """ fig.js_on_event(events.Tap, _tap_callback(comptable_ds, div_content, io_generator)) return fig + + +def _create_clustering_tsne_plt(cluster_labels, similarity_t_sne, io_generator): + """Plot the clustering results of robustica using Bokeh. + + Parameters + ---------- + cluster_labels : (n_pca_components x n_robust_runs,) : numpy.ndarray + A one dimensional array that has the cluster label of each run. + similarity_t_sne : (n_pca_components x n_robust_runs,2) : numpy.ndarray + An array containing the coordinates of projected data. + io_generator : object + An object containing all the information needed to generate the output. + """ + title = "2D projection of clustered ICA runs using TSNE" + marker_size = 8 + alpha = 0.8 + line_width = 2 + + # Create figure + p = plotting.figure( + title=title, + width=800, + height=600, + tools="pan,box_zoom,wheel_zoom,reset,save", + ) + p.title.text_font_size = "16px" + p.xaxis.axis_label = "x1" + p.yaxis.axis_label = "x2" + + # # Create ColumnDataSource for all points + # source_data = { + # "x": similarity_t_sne[:, 0], + # "y": similarity_t_sne[:, 1], + # "cluster": cluster_labels, + # } + # source = models.ColumnDataSource(source_data) + + # Plot regular clusters + for cluster_id in range(np.max(cluster_labels) + 1): + cluster_mask = cluster_labels == cluster_id + if not np.any(cluster_mask): + continue + + # Get points for this cluster + cluster_points = similarity_t_sne[cluster_mask] + + # Add scatter plot for cluster points + p.circle( + x="x", + y="y", + source=models.ColumnDataSource({"x": cluster_points[:, 0], "y": cluster_points[:, 1]}), + size=marker_size, + alpha=alpha, + line_color="black", + fill_color=None, + line_width=line_width, + legend_label="Clustered runs", + ) + + # Add hull if enough points + if cluster_points.shape[0] > 2: + from scipy.spatial import ConvexHull + + hull = ConvexHull(cluster_points) + centroid = np.mean(cluster_points[hull.vertices], axis=0) + scaled_points = centroid + 1.5 * (cluster_points - centroid) + + # Create hull line segments + xs = [] + ys = [] + for simplex in hull.simplices: + xs.extend([scaled_points[simplex[0], 0], scaled_points[simplex[1], 0], None]) + ys.extend([scaled_points[simplex[0], 1], scaled_points[simplex[1], 1], None]) + + p.line( + x=xs, + y=ys, + line_color="blue", + line_dash="dashed", + line_width=line_width, + legend_label="Cluster's boundary", + ) + + # Plot noise clusters if they exist + if np.min(cluster_labels) == -1: + noise_mask = cluster_labels == -1 + noise_points = similarity_t_sne[noise_mask] + + p.x( + x=noise_points[:, 0], + y=noise_points[:, 1], + size=marker_size * 2, + alpha=0.6, + color="red", + legend_label="Unclustered runs", + ) + + # Configure legend + p.legend.click_policy = "hide" + p.legend.location = "top_right" + + # Save HTML file + # plot_name = f"{io_generator.prefix}clustering_projection_tsne.html" + # plotting.save(p, os.path.join(io_generator.out_dir, "figures", plot_name)) + + breakpoint() + + return p From 658782c4748a87b3c9fd93218585e7c6b8ced8ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= Date: Wed, 27 Nov 2024 13:05:13 -0700 Subject: [PATCH 2/7] Pass all robust ica parameters to the plot making function --- tedana/decomposition/ica.py | 6 +++--- tedana/reporting/dynamic_figures.py | 2 ++ tedana/reporting/html_report.py | 8 +++++++- tedana/workflows/tedana.py | 4 ++-- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py index d687db6e0..2667ae789 100644 --- a/tedana/decomposition/ica.py +++ b/tedana/decomposition/ica.py @@ -69,7 +69,7 @@ def tedica( ica_method = ica_method.lower() if ica_method == "robustica": - mixing, fixed_seed = r_ica( + mixing, fixed_seed, cluster_labels, signs = r_ica( data, n_components=n_components, fixed_seed=fixed_seed, @@ -87,7 +87,7 @@ def tedica( else: raise ValueError("The selected ICA method is invalid!") - return mixing, fixed_seed + return mixing, fixed_seed, cluster_labels, signs def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): @@ -192,7 +192,7 @@ def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): f"decomposition." ) - return mixing, fixed_seed + return mixing, fixed_seed, robust_ica.clustering.labels_, robust_ica.signs_ def f_ica(data, n_components, fixed_seed, maxit, maxrestart): diff --git a/tedana/reporting/dynamic_figures.py b/tedana/reporting/dynamic_figures.py index a2aaae380..f64a5b272 100644 --- a/tedana/reporting/dynamic_figures.py +++ b/tedana/reporting/dynamic_figures.py @@ -451,6 +451,8 @@ def _create_clustering_tsne_plt(cluster_labels, similarity_t_sne, io_generator): p.xaxis.axis_label = "x1" p.yaxis.axis_label = "x2" + breakpoint() + # # Create ColumnDataSource for all points # source_data = { # "x": similarity_t_sne[:, 0], diff --git a/tedana/reporting/html_report.py b/tedana/reporting/html_report.py index b07cd8fbe..3d869ead8 100644 --- a/tedana/reporting/html_report.py +++ b/tedana/reporting/html_report.py @@ -231,7 +231,7 @@ def _generate_info_table(info_dict): return info_html -def generate_report(io_generator: OutputGenerator) -> None: +def generate_report(io_generator: OutputGenerator, cluster_labels, similarity_t_sne) -> None: """Generate an HTML report. Parameters @@ -320,6 +320,12 @@ def get_elbow_val(elbow_prefix): ) varexp_pie_plot = df._create_varexp_pie_plt(comptable_cds) + # Create clustering plot + clustering_tsne_plot = df._create_clustering_tsne_plt( + cluster_labels, similarity_t_sne, io_generator + ) + breakpoint() + # link all dynamic figures figs = [kappa_rho_plot, kappa_sorted_plot, rho_sorted_plot, varexp_pie_plot] diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index 739a03210..3985d9270 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -789,7 +789,7 @@ def tedana_workflow( n_restarts = 0 seed = fixed_seed while keep_restarting: - mixing, seed = decomposition.tedica( + mixing, seed, cluster_labels, similarity_t_sne = decomposition.tedica( data_reduced, n_components, seed, @@ -1070,7 +1070,7 @@ def tedana_workflow( ) LGR.info("Generating dynamic report") - reporting.generate_report(io_generator) + reporting.generate_report(io_generator, cluster_labels, similarity_t_sne) LGR.info("Workflow completed") utils.teardown_loggers() From ac9c1ec5e49ab8e4a49adc1a0d678d8fb9eb0504 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= Date: Wed, 27 Nov 2024 14:50:52 -0700 Subject: [PATCH 3/7] First working version of bokeh plot --- tedana/decomposition/ica.py | 25 ++++++-- .../data/html/report_body_template.html | 11 +++- tedana/reporting/dynamic_figures.py | 60 +++++++++++-------- tedana/reporting/html_report.py | 18 ++++-- 4 files changed, 76 insertions(+), 38 deletions(-) diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py index 2667ae789..e520bbe7c 100644 --- a/tedana/decomposition/ica.py +++ b/tedana/decomposition/ica.py @@ -4,8 +4,9 @@ import warnings import numpy as np -from robustica import RobustICA +from robustica import RobustICA, abs_pearson_dist from scipy import stats +from sklearn import manifold from sklearn.decomposition import FastICA from tedana.config import ( @@ -69,7 +70,7 @@ def tedica( ica_method = ica_method.lower() if ica_method == "robustica": - mixing, fixed_seed, cluster_labels, signs = r_ica( + mixing, fixed_seed, c_labels, similarity_t_sne = r_ica( data, n_components=n_components, fixed_seed=fixed_seed, @@ -87,7 +88,7 @@ def tedica( else: raise ValueError("The selected ICA method is invalid!") - return mixing, fixed_seed, cluster_labels, signs + return mixing, fixed_seed, c_labels, similarity_t_sne def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): @@ -192,7 +193,23 @@ def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): f"decomposition." ) - return mixing, fixed_seed, robust_ica.clustering.labels_, robust_ica.signs_ + c_labels = robust_ica.clustering.labels_ + + perplexity = min(robust_ica.S_all.shape[1] - 1, 80) + + perplexity = perplexity - 1 if perplexity < 81 else 80 + t_sne = manifold.TSNE( + n_components=2, + perplexity=perplexity, + init="random", + max_iter=2500, + random_state=10, + ) + + p_dissimilarity = abs_pearson_dist(robust_ica.S_all) + similarity_t_sne = t_sne.fit_transform(p_dissimilarity) + + return mixing, fixed_seed, c_labels, similarity_t_sne def f_ica(data, n_components, fixed_seed, maxit, maxrestart): diff --git a/tedana/reporting/data/html/report_body_template.html b/tedana/reporting/data/html/report_body_template.html index 65c68edf2..888244369 100644 --- a/tedana/reporting/data/html/report_body_template.html +++ b/tedana/reporting/data/html/report_body_template.html @@ -39,9 +39,6 @@ .carpet-plots { float: left; - } - - .carpet-plots { margin-left: 5%; margin-right: 5%; margin-bottom: 100px; @@ -64,6 +61,13 @@ float: left; } + .tsne-plots { + float: left; + margin-left: 5%; + margin-right: 5%; + margin-bottom: 100px; + } + button { margin-right: 15px; width: auto; @@ -209,6 +213,7 @@

T2* and S0 model fit (RMSE). (Scaled between 2nd and 98th percentiles)

+$tsne

Info

$info diff --git a/tedana/reporting/dynamic_figures.py b/tedana/reporting/dynamic_figures.py index f64a5b272..e3ba8502b 100644 --- a/tedana/reporting/dynamic_figures.py +++ b/tedana/reporting/dynamic_figures.py @@ -440,26 +440,15 @@ def _create_clustering_tsne_plt(cluster_labels, similarity_t_sne, io_generator): alpha = 0.8 line_width = 2 - # Create figure + # First create the figure without the hover tool p = plotting.figure( title=title, width=800, height=600, - tools="pan,box_zoom,wheel_zoom,reset,save", + tools=["pan", "box_zoom", "wheel_zoom", "reset", "save"], # No hover tool here ) - p.title.text_font_size = "16px" - p.xaxis.axis_label = "x1" - p.yaxis.axis_label = "x2" - breakpoint() - - # # Create ColumnDataSource for all points - # source_data = { - # "x": similarity_t_sne[:, 0], - # "y": similarity_t_sne[:, 1], - # "cluster": cluster_labels, - # } - # source = models.ColumnDataSource(source_data) + point_renderers = [] # List to store point renderers # Plot regular clusters for cluster_id in range(np.max(cluster_labels) + 1): @@ -470,18 +459,26 @@ def _create_clustering_tsne_plt(cluster_labels, similarity_t_sne, io_generator): # Get points for this cluster cluster_points = similarity_t_sne[cluster_mask] - # Add scatter plot for cluster points - p.circle( + # Add scatter plot for cluster points with hover info + circle_renderer = p.circle( x="x", y="y", - source=models.ColumnDataSource({"x": cluster_points[:, 0], "y": cluster_points[:, 1]}), + source=models.ColumnDataSource( + { + "x": cluster_points[:, 0], + "y": cluster_points[:, 1], + "cluster": [f"Cluster {cluster_id}"] * len(cluster_points), + } + ), size=marker_size, alpha=alpha, line_color="black", fill_color=None, line_width=line_width, legend_label="Clustered runs", + name="points", ) + point_renderers.append(circle_renderer) # Add hull if enough points if cluster_points.shape[0] > 2: @@ -498,6 +495,7 @@ def _create_clustering_tsne_plt(cluster_labels, similarity_t_sne, io_generator): xs.extend([scaled_points[simplex[0], 0], scaled_points[simplex[1], 0], None]) ys.extend([scaled_points[simplex[0], 1], scaled_points[simplex[1], 1], None]) + # Add line without hover tooltips p.line( x=xs, y=ys, @@ -512,23 +510,33 @@ def _create_clustering_tsne_plt(cluster_labels, similarity_t_sne, io_generator): noise_mask = cluster_labels == -1 noise_points = similarity_t_sne[noise_mask] - p.x( - x=noise_points[:, 0], - y=noise_points[:, 1], + # Add noise points with hover tooltips + x_renderer = p.x( + x="x", + y="y", size=marker_size * 2, alpha=0.6, color="red", legend_label="Unclustered runs", + source=models.ColumnDataSource( + { + "x": noise_points[:, 0], + "y": noise_points[:, 1], + "cluster": ["Unclustered"] * len(noise_points), + } + ), ) + point_renderers.append(x_renderer) + + # Add hover tool after creating all renderers, specifically for points + hover_tool = models.HoverTool( + tooltips=[("Cluster", "@cluster")], + renderers=point_renderers, # Only apply to stored point renderers + ) + p.add_tools(hover_tool) # Configure legend p.legend.click_policy = "hide" p.legend.location = "top_right" - # Save HTML file - # plot_name = f"{io_generator.prefix}clustering_projection_tsne.html" - # plotting.save(p, os.path.join(io_generator.out_dir, "figures", plot_name)) - - breakpoint() - return p diff --git a/tedana/reporting/html_report.py b/tedana/reporting/html_report.py index 3d869ead8..0aaf66801 100644 --- a/tedana/reporting/html_report.py +++ b/tedana/reporting/html_report.py @@ -113,7 +113,9 @@ def _generate_buttons(out_dir, io_generator): return buttons_html -def _update_template_bokeh(bokeh_id, info_table, about, prefix, references, bokeh_js, buttons): +def _update_template_bokeh( + bokeh_id, info_table, about, prefix, references, bokeh_js, buttons, tsne +): """ Populate a report with content. @@ -133,6 +135,8 @@ def _update_template_bokeh(bokeh_id, info_table, about, prefix, references, boke Javascript created by bokeh.embed.components buttons : str HTML div created by _generate_buttons() + tsne : str + HTML div created by _create_clustering_tsne_plt() Returns ------- @@ -181,6 +185,7 @@ def _update_template_bokeh(bokeh_id, info_table, about, prefix, references, boke references=references, javascript=bokeh_js, buttons=buttons, + tsne=tsne, ) return body @@ -321,10 +326,12 @@ def get_elbow_val(elbow_prefix): varexp_pie_plot = df._create_varexp_pie_plt(comptable_cds) # Create clustering plot - clustering_tsne_plot = df._create_clustering_tsne_plt( - cluster_labels, similarity_t_sne, io_generator - ) - breakpoint() + if cluster_labels is not None: + clustering_tsne_plot = df._create_clustering_tsne_plt( + cluster_labels, similarity_t_sne, io_generator + ) + tsne_script, tsne_div = embed.components(clustering_tsne_plot) + tsne_html = f"{tsne_script}

Robust ICA component clustering

{tsne_div}
" # link all dynamic figures figs = [kappa_rho_plot, kappa_sorted_plot, rho_sorted_plot, varexp_pie_plot] @@ -377,6 +384,7 @@ def get_elbow_val(elbow_prefix): prefix=io_generator.prefix, bokeh_js=kr_script, buttons=buttons_html, + tsne=tsne_html, ) html = _save_as_html(body) with open(opj(io_generator.out_dir, f"{io_generator.prefix}tedana_report.html"), "wb") as f: From ac969fd60197c0c8c72b18992297b82437e4e03e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= Date: Thu, 23 Jan 2025 10:03:33 +0000 Subject: [PATCH 4/7] Refactor clustering TSNE plotting function by removing unused parameter and updating HTML report generation --- tedana/reporting/dynamic_figures.py | 4 +--- tedana/reporting/html_report.py | 5 ++++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tedana/reporting/dynamic_figures.py b/tedana/reporting/dynamic_figures.py index e3ba8502b..e6cc19cc6 100644 --- a/tedana/reporting/dynamic_figures.py +++ b/tedana/reporting/dynamic_figures.py @@ -423,7 +423,7 @@ def _link_figures(fig, comptable_ds, div_content, io_generator): return fig -def _create_clustering_tsne_plt(cluster_labels, similarity_t_sne, io_generator): +def _create_clustering_tsne_plt(cluster_labels, similarity_t_sne): """Plot the clustering results of robustica using Bokeh. Parameters @@ -432,8 +432,6 @@ def _create_clustering_tsne_plt(cluster_labels, similarity_t_sne, io_generator): A one dimensional array that has the cluster label of each run. similarity_t_sne : (n_pca_components x n_robust_runs,2) : numpy.ndarray An array containing the coordinates of projected data. - io_generator : object - An object containing all the information needed to generate the output. """ title = "2D projection of clustered ICA runs using TSNE" marker_size = 8 diff --git a/tedana/reporting/html_report.py b/tedana/reporting/html_report.py index 0aaf66801..3ba45aa98 100644 --- a/tedana/reporting/html_report.py +++ b/tedana/reporting/html_report.py @@ -331,7 +331,10 @@ def get_elbow_val(elbow_prefix): cluster_labels, similarity_t_sne, io_generator ) tsne_script, tsne_div = embed.components(clustering_tsne_plot) - tsne_html = f"{tsne_script}

Robust ICA component clustering

{tsne_div}
" + tsne_html = f"{tsne_script}" + tsne_html += ( + f"

Robust ICA component clustering

{tsne_div}
" + ) # link all dynamic figures figs = [kappa_rho_plot, kappa_sorted_plot, rho_sorted_plot, varexp_pie_plot] From 9ad3f0977609590ffe9334df25d90ae854086382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= Date: Thu, 23 Jan 2025 10:22:44 +0000 Subject: [PATCH 5/7] Trigger tests From 87bc63d8e01e125f5b47570c18f9223582ae2ceb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= Date: Thu, 23 Jan 2025 10:53:17 +0000 Subject: [PATCH 6/7] Rename parameter 'max_iter' to 'n_iter' in r_ica function for consistency --- tedana/decomposition/ica.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py index e520bbe7c..40db7ce98 100644 --- a/tedana/decomposition/ica.py +++ b/tedana/decomposition/ica.py @@ -202,7 +202,7 @@ def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): n_components=2, perplexity=perplexity, init="random", - max_iter=2500, + n_iter=2500, random_state=10, ) From dd8d2130cd675da294b24cb6d50a6d9584fd79ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= Date: Thu, 23 Jan 2025 13:56:31 +0000 Subject: [PATCH 7/7] Refactor clustering TSNE plotting function by removing unused parameter from report generation --- tedana/reporting/html_report.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tedana/reporting/html_report.py b/tedana/reporting/html_report.py index 3ba45aa98..0ed20681d 100644 --- a/tedana/reporting/html_report.py +++ b/tedana/reporting/html_report.py @@ -327,9 +327,7 @@ def get_elbow_val(elbow_prefix): # Create clustering plot if cluster_labels is not None: - clustering_tsne_plot = df._create_clustering_tsne_plt( - cluster_labels, similarity_t_sne, io_generator - ) + clustering_tsne_plot = df._create_clustering_tsne_plt(cluster_labels, similarity_t_sne) tsne_script, tsne_div = embed.components(clustering_tsne_plot) tsne_html = f"{tsne_script}" tsne_html += (