From be08c60bc5fb473f376c0532c9e7b685f4020954 Mon Sep 17 00:00:00 2001 From: wreise Date: Mon, 13 Feb 2023 12:55:30 +0100 Subject: [PATCH 1/4] Correct bracket for exponential --- src/python/test/test_representations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index d3f8d8a03c..552052db9b 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -93,7 +93,7 @@ def test_kernel_from_distance(): kernelClass, kernelParams, tolerance = kernel_dict[kernelName] f1 = kernelClass.fit_transform(l1) d1 = pairwise_persistence_diagram_distances(l1, metric=kernelName, **kernelParams) - assert np.exp(-d1/kernelClass.bandwidth == pytest.approx(f1, **tolerance)) + assert np.exp(-d1/kernelClass.bandwidth) == pytest.approx(f1, **tolerance) def test_kernel_distance_consistency(): l1, l2 = _n_diags(9), _n_diags(11) From 7238d020ab15783c3c1548e732dc2b4b9a20d92c Mon Sep 17 00:00:00 2001 From: wreise Date: Wed, 15 Feb 2023 23:20:15 +0100 Subject: [PATCH 2/4] Add kernel_approx tests; Wasserstein with pot & hera comparisons --- src/python/test/test_representations.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 552052db9b..01435286d8 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -6,6 +6,7 @@ import random from sklearn.cluster import KMeans +from sklearn.kernel_approximation import RBFSampler # Vectorization from gudhi.representations import (Landscape, Silhouette, BettiCurve, ComplexPolynomial,\ @@ -48,9 +49,12 @@ def _n_diags(n): "bottleneck": (BottleneckDistance(epsilon=0.00001), dict(e=0.00001), dict(abs=1e-5)), - "wasserstein": (WassersteinDistance(order=2, internal_p=2, n_jobs=4), + "pot_wasserstein": (WassersteinDistance(order=2, mode="pot", internal_p=2, n_jobs=4), dict(order=2, internal_p=2, n_jobs=4), dict(rel=1e-3)), + "hera_wasserstein": (WassersteinDistance(order=2, mode="hera", delta=0.001, internal_p=2, n_jobs=4), + dict(order=2, internal_p=2, n_jobs=4, delta=0.001,), + dict(rel=1e-3)), "sliced_wasserstein": (SlicedWassersteinDistance(num_directions=100, n_jobs=4), dict(num_directions=100), dict(rel=1e-3)), @@ -87,6 +91,8 @@ def test_distance_transform_consistency(): dict(bandwidth=4.), dict(rel=1e-3)), } + +kernel_approx = RBFSampler(gamma=1./2, n_components=100000) def test_kernel_from_distance(): l1, l2 = _n_diags(9), _n_diags(11) for kernelName in ["sliced_wasserstein", "persistence_fisher"]: @@ -102,6 +108,10 @@ def test_kernel_distance_consistency(): f2 = kernelClass.transform(l2) f12 = np.array([[kernelClass(l1_, l2_) for l1_ in l1] for l2_ in l2]) assert f12 == pytest.approx(f2, **tolerance) + kernelClass.kernel_approx_ = kernel_approx + _ = kernelClass.fit(l1) + f2_approx = kernelClass.transform(l2) + assert f2_approx == pytest.approx(f2, **tolerance) def test_sliced_wasserstein_distance_value(): diag1 = np.array([[0., 1.], [0., 2.]]) From c6fd175cb132f4a162c91c9cb7f01f7c84bb4955 Mon Sep 17 00:00:00 2001 From: wreise Date: Mon, 6 Mar 2023 22:01:22 +0100 Subject: [PATCH 3/4] Correct kernel approx tests after Marcs' comments --- src/python/test/test_representations.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 01435286d8..a1db15882e 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -79,39 +79,39 @@ def test_distance_transform_consistency(): kernel_dict = { "sliced_wasserstein": (SlicedWassersteinKernel(num_directions=10, bandwidth=4., n_jobs=4), - dict(num_directions=10), dict(rel=1e-3)), + dict(num_directions=10), dict(rel=1e-3), dict(rel=1e-2)), "persistence_fisher": (PersistenceFisherKernel(bandwidth_fisher=3., bandwidth=1.), dict(bandwidth=3.), # corresponds to bandwidth_fisher in the kernel class - dict(rel=1e-3)), + dict(rel=1e-3), dict(rel=1e-2)), "persistence_weighted_gaussian": (PersistenceWeightedGaussianKernel(bandwidth=4., weight=lambda x: x[1]-x[0]), dict(bandwidth=4., weight=lambda x: x[1]-x[0]), - dict(rel=1e-3)), + dict(rel=1e-3), dict(rel=1e-2)), "persistence_scale_space": (PersistenceScaleSpaceKernel(bandwidth=4.), dict(bandwidth=4.), - dict(rel=1e-3)), + dict(rel=1e-3), dict(rel=1e-2)), } -kernel_approx = RBFSampler(gamma=1./2, n_components=100000) def test_kernel_from_distance(): l1, l2 = _n_diags(9), _n_diags(11) for kernelName in ["sliced_wasserstein", "persistence_fisher"]: - kernelClass, kernelParams, tolerance = kernel_dict[kernelName] + kernelClass, kernelParams, tolerance, _ = kernel_dict[kernelName] f1 = kernelClass.fit_transform(l1) d1 = pairwise_persistence_diagram_distances(l1, metric=kernelName, **kernelParams) assert np.exp(-d1/kernelClass.bandwidth) == pytest.approx(f1, **tolerance) def test_kernel_distance_consistency(): l1, l2 = _n_diags(9), _n_diags(11) - for kernelName, (kernelClass, kernelParams, tolerance) in kernel_dict.items(): + for kernelName, (kernelClass, kernelParams, tolerance, tolerance_approximate_kernel) in kernel_dict.items(): _ = kernelClass.fit(l1) f2 = kernelClass.transform(l2) f12 = np.array([[kernelClass(l1_, l2_) for l1_ in l1] for l2_ in l2]) assert f12 == pytest.approx(f2, **tolerance) - kernelClass.kernel_approx_ = kernel_approx + kernel_approx = RBFSampler(gamma=0.5, n_components=1000).fit(np.array([[0., 2.]])) + kernelClass.kernel_approx = kernel_approx _ = kernelClass.fit(l1) f2_approx = kernelClass.transform(l2) - assert f2_approx == pytest.approx(f2, **tolerance) + assert f2_approx == pytest.approx(f2, **tolerance_approximate_kernel) def test_sliced_wasserstein_distance_value(): diag1 = np.array([[0., 1.], [0., 2.]]) From d09218cc83f0daa250e4b7e16051ba297c502ffc Mon Sep 17 00:00:00 2001 From: wreise Date: Mon, 16 Oct 2023 17:51:16 +0200 Subject: [PATCH 4/4] Fix tests --- src/python/test/test_representations.py | 32 +++++++++++++++---------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index 987c5beea2..6b68d7d2b8 100644 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -79,35 +79,43 @@ def test_distance_transform_consistency(): kernel_dict = { "sliced_wasserstein": (SlicedWassersteinKernel(num_directions=10, bandwidth=4., n_jobs=4), - dict(num_directions=10), dict(rel=1e-3), dict(rel=1e-2)), + dict(num_directions=10)), "persistence_fisher": (PersistenceFisherKernel(bandwidth_fisher=3., bandwidth=1.), - dict(bandwidth=3.), # corresponds to bandwidth_fisher in the kernel class - dict(rel=1e-3), dict(rel=1e-2)), - "persistence_weighted_gaussian": (PersistenceWeightedGaussianKernel(bandwidth=4., + dict(bandwidth=3.),), # corresponds to bandwidth_fisher in the kernel class + "persistence_weighted_gaussian": (PersistenceWeightedGaussianKernel(bandwidth=2., weight=lambda x: x[1]-x[0]), - dict(bandwidth=4., weight=lambda x: x[1]-x[0]), - dict(rel=1e-3), dict(rel=1e-2)), - "persistence_scale_space": (PersistenceScaleSpaceKernel(bandwidth=4.), - dict(bandwidth=4.), - dict(rel=1e-3), dict(rel=1e-2)), + dict(bandwidth=2., weight=lambda x: x[1]-x[0]),), + "persistence_scale_space": (PersistenceScaleSpaceKernel(bandwidth=3.), + dict(bandwidth=3.)), } def test_kernel_from_distance(): l1, l2 = _n_diags(9), _n_diags(11) + tolerance = dict(rel=1e-3) for kernelName in ["sliced_wasserstein", "persistence_fisher"]: - kernelClass, kernelParams, tolerance, _ = kernel_dict[kernelName] + kernelClass, kernelParams = kernel_dict[kernelName] f1 = kernelClass.fit_transform(l1) d1 = pairwise_persistence_diagram_distances(l1, metric=kernelName, **kernelParams) assert np.exp(-d1/kernelClass.bandwidth) == pytest.approx(f1, **tolerance) def test_kernel_distance_consistency(): l1, l2 = _n_diags(9), _n_diags(11) - for kernelName, (kernelClass, kernelParams, tolerance, tolerance_approximate_kernel) in kernel_dict.items(): + tolerance = dict(rel=1e-3) + for kernelName, (kernelClass, kernelParams) in kernel_dict.items(): _ = kernelClass.fit(l1) f2 = kernelClass.transform(l2) f12 = np.array([[kernelClass(l1_, l2_) for l1_ in l1] for l2_ in l2]) assert f12 == pytest.approx(f2, **tolerance) - kernel_approx = RBFSampler(gamma=0.5, n_components=1000).fit(np.array([[0., 2.]])) + +def test_kernel_approximation(): + l1, l2 = _n_diags(3), _n_diags(3) + tolerance_approximate_kernel = dict(rel=1e-1) + for kernelName in ["persistence_weighted_gaussian", "persistence_scale_space"]: + kernelClass, kernelParams = kernel_dict[kernelName] + _ = kernelClass.fit(l1) + f2 = kernelClass.transform(l2) + gamma = 0.5*1./(kernelParams["bandwidth"]**2) + kernel_approx = RBFSampler(gamma=gamma, n_components=1000).fit(np.array([[0., 2.]])) kernelClass.kernel_approx = kernel_approx _ = kernelClass.fit(l1) f2_approx = kernelClass.transform(l2)