diff --git a/.github/workflows/build-paper-pdf.yaml b/.github/workflows/build-paper-pdf.yaml
new file mode 100644
index 0000000..612bebc
--- /dev/null
+++ b/.github/workflows/build-paper-pdf.yaml
@@ -0,0 +1,37 @@
+name: Build paper
+
+on:
+ push:
+ paths:
+ - paper/**
+
+jobs:
+ paper:
+ runs-on: ubuntu-latest
+ name: Build Paper PDF
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Build draft PDF
+ uses: openjournals/openjournals-draft-action@master
+ with:
+ journal: joss
+ # This should be the path to the paper within your repo.
+ paper-path: paper/paper.md
+ - name: Build supplementary PDF
+ uses: docker://pandoc/latex:2.9
+ with:
+ args: >- # allows you to break string into multiple lines
+ --standalone
+ --output=paper/supplementary.pdf
+ --bibliography=paper/references.bib
+ --resource-path=paper/
+ paper/supplementary.md
+ - name: Upload
+ uses: actions/upload-artifact@v4
+ with:
+ name: paper-pdf
+ # This is the output path where Pandoc will write the compiled
+ # PDF. Note, this should be the same directory as the input
+ # paper.md
+ path: paper/*.pdf
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index da37ea3..d713de6 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,8 +1,8 @@
-# Based on:
+# Based on:
# https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
-name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI
+name: Release
-on:
+on:
push:
branches:
- main
@@ -14,9 +14,9 @@ jobs:
name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Set up Python 3.10
- uses: actions/setup-python@v3
+ uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install pypa/build
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 10f1696..6246014 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,7 +1,7 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
-name: Lint and test code, test documentation build
+name: Test
on:
push:
@@ -35,7 +35,7 @@ jobs:
# see .flake8 config file for selected/ignored rules.
# warnings can be found in the action logs
- docs:
+ docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
@@ -55,21 +55,26 @@ jobs:
test:
strategy:
matrix:
- python-version:
+ python-version:
- "3.9"
- "3.10"
- "3.11"
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Setup Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: pip
cache-dependency-path: setup.cfg
- name: Setup R
uses: r-lib/actions/setup-r@v2
+ - name: Install R dependencies
+ run: |
+ # 2024-05-14: loe is not available from CRAN, we have to fallback to the archive.
+ wget https://cran.r-project.org/src/contrib/Archive/loe/loe_1.1.tar.gz
+ R CMD INSTALL ./loe_1.1.tar.gz
- name: Install package with dependencies
run: |
python3 -m pip install --upgrade pip
@@ -79,10 +84,9 @@ jobs:
run: |
pytest cblearn --cov=cblearn --cov-report=xml --remote-data
- name: Upload coverage to Codecov
- uses: codecov/codecov-action@v1
+ uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
flags: unittests
env_vars: OS,PYTHON
-
diff --git a/README.md b/README.md
index 699ad2b..3160287 100644
--- a/README.md
+++ b/README.md
@@ -1,19 +1,15 @@
-# cblearn
+
+
+
+
## Comparison-based Machine Learning in Python
[data:image/s3,"s3://crabby-images/294e7/294e72f086fa0445f3c2bbd8d61a7b2c6266513c" alt="PyPI version"](https://pypi.python.org/pypi/cblearn)
[data:image/s3,"s3://crabby-images/ed747/ed747eb7ed1acf636b42c06129918c5e6d6f8ef8" alt="Documentation"](https://cblearn.readthedocs.io/en/stable/?badge=stable)
[data:image/s3,"s3://crabby-images/fae6b/fae6b9dde68eac4580d3015044b9cd5af305980c" alt="Test status"](https://github.com/cblearn/cblearn/actions/workflows/test.yml)
[data:image/s3,"s3://crabby-images/4c507/4c5074b13af61345229c92337d4d20ce43df0cee" alt="Test Coverage"](https://codecov.io/gh/cblearn/cblearn)
-Comparison-based Learning algorithms are the Machine Learning algorithms to use when training data contains similarity comparisons ("A and B are more similar than C and D") instead of data points.
-
-Triplet comparisons from human observers help model the perceived similarity of objects.
-These human triplets are collected in studies, asking questions like
-"Which of the following bands is most similar to Queen?" or
-"Which color appears most similar to the reference?".
+Comparison-based learning methods are machine learning algorithms using similarity comparisons ("A and B are more similar than C and D") instead of featurized data.
-This library provides an easy-to-use interface for comparison-based learning algorithms.
-It plays hand-in-hand with scikit-learn:
```python
from sklearn.datasets import load_iris
@@ -36,51 +32,25 @@ embedding = estimator.fit_transform(triplets)
print(f"The embedding has shape {embedding.shape}.")
```
-Please try the [Examples](https://cblearn.readthedocs.io/en/stable/generated_examples/index.html).
-
## Getting Started
-Install cblearn as described [here](https://cblearn.readthedocs.io/en/stable/install.html) and try the [examples](https://cblearn.readthedocs.io/en/stable/generated_examples/index.html).
-
-Find a theoretical introduction to comparison-based learning, the datatypes,
-algorithms, and datasets in the [User Guide](https://cblearn.readthedocs.io/en/stable/user_guide/index.html).
-
-## Features
-
-### Datasets
-
-*cblearn* provides utility methods to simplify the loading and conversion
-of your comparison datasets. In addition, some functions download and load multiple real-world comparisons.
+* [Installation & Quickstart](https://cblearn.readthedocs.io/en/stable/getting_started.html)
+* [Examples](https://cblearn.readthedocs.io/en/stable/generated_examples/index.html).
+* [User Guide](https://cblearn.readthedocs.io/en/stable/user_guide/index.html).
-| Dataset | Query | #Object | #Response | #Triplet |
-| --- | --- | ---:| ---:| ---:|
-| Vogue Cover | Odd-out Triplet | 60 | 1,107 | 2,214 |
-| Nature Scene | Odd-out Triplet | 120 | 3,355 | 6,710 |
-| Car | Most-Central Triplet | 60 | 7,097 | 14,194 |
-| Material | Standard Triplet | 100 | 104,692 |104,692 |
-| Food | Standard Triplet | 100 | 190,376 |190,376 |
-| Musician | Standard Triplet | 413 | 224,792 |224,792 |
-| Things Image Testset | Odd-out Triplet | 1,854 | 146,012 | 292,024 |
-| ImageNet Images v0.1 | Rank 2 from 8 | 1,000 | 25,273 | 328,549 |
-| ImageNet Images v0.2 | Rank 2 from 8 | 50,000 | 384,277 | 5M |
-
-
-### Embedding Algorithms
-
-| Algorithm | Default | Pytorch (GPU) | Reference Wrapper |
-| --------------------------- | :---: | :-----------: | :---------------: |
-| Crowd Kernel Learning (CKL) | X | X | |
-| FORTE | | X | |
-| GNMDS | X | X | |
-| Maximum-Likelihood Difference Scaling (MLDS) | X | | [MLDS (R)](https://cran.r-project.org/web/packages/MLDS/index.html)|
-| Soft Ordinal Embedding (SOE) | X | X | [loe (R)](https://cran.r-project.org/web/packages/loe/index.html) |
-| Stochastic Triplet Embedding (STE/t-STE) | X | X | |
## Contribute
We are happy about your bug reports, questions or suggestions as Github Issues and code or documentation contributions as Github Pull Requests.
Please see our [Contributor Guide](https://cblearn.readthedocs.io/en/stable/contributor_guide/index.html).
+## Related packages
+
+There are more Python packages for comparison-based learning:
+
+- [metric-learn](http://contrib.scikit-learn.org/metric-learn) is a collection of algorithms for metric learning. The *weakly supervised* algorithms learn from triplets and quadruplets.
+- [salmon](https://docs.stsievert.com/salmon/) is a package to collect triplets efficiently in crowd-sourced experiments. Therefore it implements ordinal embedding algorithms and sampling strategies to actively query the most informative comparisons.
+
## Authors and Acknowledgement
*cblearn* was initiated by current and former members of the [Theory of Machine Learning group](http://www.tml.cs.uni-tuebingen.de/index.php) of Prof. Dr. Ulrike von Luxburg at the University of Tübingen.
The leading developer is [David-Elias Künstle](http://www.tml.cs.uni-tuebingen.de/team/kuenstle/index.php).
diff --git a/cblearn/datasets/_food_similarity.py b/cblearn/datasets/_food_similarity.py
index e9d6628..a8e31ed 100644
--- a/cblearn/datasets/_food_similarity.py
+++ b/cblearn/datasets/_food_similarity.py
@@ -27,7 +27,7 @@ def fetch_food_similarity(data_home: Optional[os.PathLike] = None, download_if_m
.. warning::
This function downloads the file without verifying the ssl signature to circumvent an outdated certificate of the dataset hosts.
However, after downloading the function verifies the file checksum before loading the file to minimize the risk of man-in-the-middle attacks.
-
+
=================== =====================
Triplets 190376
Objects 100
@@ -83,12 +83,14 @@ def fetch_food_similarity(data_home: Optional[os.PathLike] = None, download_if_m
archive_path = _base._fetch_remote(ARCHIVE, dirname=data_home)
finally:
ssl._create_default_https_context = ssl_default
-
+
with zipfile.ZipFile(archive_path) as zf:
with zf.open('food100-dataset/all-triplets.csv', 'r') as f:
triplets = np.loadtxt(f, dtype=str, delimiter=';')
+ triplets = np.char.strip(triplets) # trim whitespace
- image_names = np.asarray([name[len('food100-dataset/'):] for name in zf.namelist()
+ image_names = np.asarray([name[len('food100-dataset/'):]
+ for name in zf.namelist()
if name.startswith('food100-dataset/images/')
and name.endswith('.jpg')])
diff --git a/cblearn/datasets/_musician_similarity.py b/cblearn/datasets/_musician_similarity.py
index 75f2e31..17a19ec 100644
--- a/cblearn/datasets/_musician_similarity.py
+++ b/cblearn/datasets/_musician_similarity.py
@@ -20,15 +20,21 @@
def fetch_musician_similarity(data_home: Optional[os.PathLike] = None, download_if_missing: bool = True,
shuffle: bool = True, random_state: Optional[np.random.RandomState] = None,
- return_triplets: bool = False) -> Union[Bunch, np.ndarray]:
+ return_triplets: bool = False,
+ valid_triplets: bool = True) -> Union[Bunch, np.ndarray]:
""" Load the MusicSeer musician similarity dataset (triplets).
=================== =====================
- Triplets 131.970
+ Triplets 118.263
Objects (Artists) 448
Dimensionality unknown
=================== =====================
+ .. warning::
+ This dataset contains triplets of musicians, which are not unique.
+ I.e. for some triplets (i, j, k), i==j, j==k, or i==k is possible.
+ This function by default filters out these triplets, but this can be disabled by setting `valid_triplets=False`.
+
See :ref:`musician_similarity_dataset` for a detailed description.
Args:
@@ -42,6 +48,8 @@ def fetch_musician_similarity(data_home: Optional[os.PathLike] = None, download_
Initialization for shuffle random generator
return_triplets : boolean, default=False.
If True, returns numpy array instead of a Bunch object.
+ valid_triplets: boolean, default=True.
+ If True, only valid triplets are returned. I.e. triplets where i!=j!=k.
Returns:
dataset : :class:`~sklearn.utils.Bunch`
@@ -102,6 +110,11 @@ def fetch_musician_similarity(data_home: Optional[os.PathLike] = None, download_
triplet_filter = musicians_data['other'] != '' # remove bi-tuples.
triplet_ids = np.c_[musicians_data['target'], musicians_data['chosen'], musicians_data['other']]
+ if valid_triplets:
+ triplet_filter = (triplet_filter
+ & (triplet_ids[:, 0] != triplet_ids[:, 1])
+ & (triplet_ids[:, 1] != triplet_ids[:, 2])
+ & (triplet_ids[:, 0] != triplet_ids[:, 2]))
triplet_ids = triplet_ids[triplet_filter].astype(int)
all_ids, triplets = np.unique(triplet_ids, return_inverse=True)
diff --git a/cblearn/datasets/_triplet_response.py b/cblearn/datasets/_triplet_response.py
index 2cfd736..6d020c4 100644
--- a/cblearn/datasets/_triplet_response.py
+++ b/cblearn/datasets/_triplet_response.py
@@ -9,6 +9,17 @@
from cblearn.datasets._datatypes import NoiseTarget, Distance
+def _count_unique_items(query):
+ """ Count unique items per row in a 2D array.
+
+ Efficient approach even for large number of rows
+ and integer items:
+ https://stackoverflow.com/a/48473125
+ """
+ sorted_query = np.sort(query, axis=1)
+ return (sorted_query[:, 1:] != sorted_query[:, :-1]).sum(axis=1) + 1
+
+
def noisy_triplet_response(triplets: utils.Query, embedding: np.ndarray, result_format: Optional[str] = None,
noise: Union[None, str, Callable] = None, noise_options: Dict = {},
noise_target: Union[str, NoiseTarget] = 'differences',
@@ -63,6 +74,14 @@ def noisy_triplet_response(triplets: utils.Query, embedding: np.ndarray, result_
result_format = utils.check_format(result_format, triplets, None)
triplets: np.ndarray = utils.check_query(triplets, result_format=utils.QueryFormat.LIST)
embedding = check_array(embedding)
+ if triplets.shape[1] != 3:
+ raise ValueError("Triplets require 3 columns.")
+ if (triplets < 0).any() or (triplets >= embedding.shape[0]).any():
+ raise ValueError("Triplet indices must be within the range of the embedding.")
+ non_unique_rows = _count_unique_items(triplets) != 3
+ if (non_unique_rows).any():
+ raise ValueError(f"Triplets must contain unique indices, got {triplets[non_unique_rows]}.")
+
if isinstance(noise, str):
random_state = check_random_state(random_state)
noise_fun: Callable = getattr(random_state, noise)
diff --git a/cblearn/datasets/descr/car_similarity.rst b/cblearn/datasets/descr/car_similarity.rst
index 84c2685..738270c 100644
--- a/cblearn/datasets/descr/car_similarity.rst
+++ b/cblearn/datasets/descr/car_similarity.rst
@@ -10,7 +10,8 @@ The people chose one car of three, such that the following statement is true:
All images were found on Wikimedia Commons and are assigned to one of four classes:
ORDINARY CARS, SPORTS CARS, OFF-ROAD/SPORT UTILITY VEHICLES, and OUTLIERS.
-The corresponding car images are available with the _`full dataset`.
+The corresponding car images are available here in the `full dataset`_.
+
.. _full dataset: http://www.tml.cs.uni-tuebingen.de/team/luxburg/code_and_data/index.php
**Data Set Characteristics:**
diff --git a/cblearn/datasets/tests/test_food_similarity.py b/cblearn/datasets/tests/test_food_similarity.py
index c02325a..9edf1a1 100644
--- a/cblearn/datasets/tests/test_food_similarity.py
+++ b/cblearn/datasets/tests/test_food_similarity.py
@@ -11,6 +11,7 @@ def test_fetch_food(tmp_path):
assert bunch.data.shape == (190376, 3)
assert bunch.image_names.shape == (100, )
+ assert (bunch.data[:, 1] != bunch.data[:, 2]).all(), "Something went wrong during parsing"
assert bunch.image_names[bunch.data[0, 0]] == 'images/214649bfd7ea489b8daf588e6fed45aa.jpg'
triplets = fetch_food_similarity(data_home=data_home, shuffle=False, return_triplets=True)
diff --git a/cblearn/datasets/tests/test_musician_similarity.py b/cblearn/datasets/tests/test_musician_similarity.py
index b0b866e..2182ba7 100644
--- a/cblearn/datasets/tests/test_musician_similarity.py
+++ b/cblearn/datasets/tests/test_musician_similarity.py
@@ -9,10 +9,10 @@ def test_fetch_musician_similarity(tmp_path):
data_home = tmp_path / 'cblearn_datasets'
bunch = fetch_musician_similarity(data_home=data_home, shuffle=False)
- assert bunch.data.shape == (131_970, 3)
- assert bunch.judgement_id.shape == (131_970, )
- assert bunch.user.shape == (131_970, )
- assert bunch.survey_or_game.shape == (131_970, )
+ assert bunch.data.shape == (118_263, 3)
+ assert bunch.judgement_id.shape == (118_263, )
+ assert bunch.user.shape == (118_263, )
+ assert bunch.survey_or_game.shape == (118_263, )
assert bunch.artist_name.shape == (448, )
assert bunch.artist_id.shape == (448, )
assert bunch.artist_name[bunch.data][0, 0] == 'queen'
diff --git a/cblearn/datasets/tests/test_triplet_response.py b/cblearn/datasets/tests/test_triplet_response.py
new file mode 100644
index 0000000..139f66a
--- /dev/null
+++ b/cblearn/datasets/tests/test_triplet_response.py
@@ -0,0 +1,33 @@
+import pytest
+import numpy as np
+
+from cblearn.datasets import triplet_response
+
+
+def test_triplet_response_validates_input():
+ n = 5 # n objects
+ t = 10 # n triplets
+ d = 2 # n dimensions
+ valid_queries = [
+ np.random.choice(n, size=3, replace=False)
+ for _ in range(t)
+ ]
+ invalid_queries_1 = [
+ np.random.choice(n, size=5, replace=False)
+ for _ in range(t)
+ ]
+ invalid_queries_2 = [
+ np.random.choice(n + 1, size=3, replace=False)
+ for _ in range(t)
+ ]
+ invalid_queries_3 = np.random.uniform(low=-1, high=1, size=(t, 3))
+ embedding = np.random.normal(size=(n, d))
+
+ responses = triplet_response(valid_queries, embedding)
+ assert responses.shape == (t, 3)
+ with pytest.raises(ValueError):
+ triplet_response(invalid_queries_1, embedding)
+ with pytest.raises(ValueError):
+ triplet_response(invalid_queries_2, embedding)
+ with pytest.raises(ValueError):
+ triplet_response(invalid_queries_3, embedding)
\ No newline at end of file
diff --git a/cblearn/embedding/_base.py b/cblearn/embedding/_base.py
index c1bf4e2..fc0f503 100644
--- a/cblearn/embedding/_base.py
+++ b/cblearn/embedding/_base.py
@@ -1,4 +1,5 @@
from typing import Optional
+import warnings
import numpy as np
from sklearn.base import TransformerMixin
@@ -15,10 +16,43 @@ def _more_tags(self):
'requires_positive_X': True,
'requires_positive_y': True,
'X_types': ['categorical'],
+ 'preserves_dtype': [], # .transform does not preserve dtype
+ 'binary_only': True, # enforce binary y in tests
+ 'triplets': True # enforce triplet X in tests
}
- def transform(self, X: Optional[utils.Query] = None, y: Optional[np.ndarray] = None) -> np.ndarray:
- check_is_fitted(self, 'embedding_')
+ def transform(self, X: Optional[utils.Query]):
+ """ Transform the input data into the learned embedding.
+
+ The input data can be none or an array with all or a subset of the
+ triplets provided by .fit method.
+ Actually, the input data is not used in this method, but is required
+ for compatibility with the scikit-learn API.
+
+ Args:
+ X: Triplet answers, identical to the input in the .fit method or None.
+ Returns:
+ The learned embedding.
+ Warns:
+ If X is not the same instance as in the .fit method.
+ """
+ check_is_fitted(self, ['embedding_', 'fit_X_'])
+
+ if X is not None:
+ # Check if the input is a valid query, required by sklearn estimator tests
+ X_order = utils.check_query(X, result_format='list-order')
+ not_isin = ~utils.isin_query(X_order, self.fit_X_)
+ if not_isin.any():
+ # X has to be allowed for the sklearn Pipeline API.
+ # https://github.com/scikit-learn/scikit-learn/blob/
+ # 19f41496868a98d8326a20e2a3039b2a4e24280e/sklearn/pipeline.py#L258
+ # https://github.com/scikit-learn/scikit-learn/blob/19f41496868a98d8326a20e2a3039b2a4e24280e/
+ # sklearn/pipeline.py#L1302C1-L1303C85
+ warnings.warn(UserWarning(
+ "Expects the same X queries in .fit and .transform (or None),"
+ f"got {X_order[not_isin]} not in fit(X).\n"
+ "Note: X can be passed for compatibility with the scikit-learn API."))
+
return self.embedding_
def predict(self, X: utils.Query, result_format: Optional[utils.Format] = None) -> np.ndarray:
@@ -37,6 +71,5 @@ def score(self, X: utils.Query, y: Optional[np.ndarray] = None) -> float:
Returns.
Fraction of correct triplets.
"""
- if y is None:
- y = X
- return metrics.query_accuracy(self.predict(X), y)
+ X, y = utils.check_query_response(X, y, result_format='list-count')
+ return metrics.query_accuracy(self.predict(X, result_format='list-count'), y)
diff --git a/cblearn/embedding/_ckl.py b/cblearn/embedding/_ckl.py
index b5464f2..aeee4b1 100644
--- a/cblearn/embedding/_ckl.py
+++ b/cblearn/embedding/_ckl.py
@@ -102,7 +102,9 @@ def fit(self, X: utils.Query, y: np.ndarray = None, init: np.ndarray = None,
Returns:
self.
"""
+ self.fit_X_ = utils.check_query(X, result_format='list-order') # for data validation in .transform
triplets = utils.check_query_response(X, y, result_format='list-order')
+ self.n_features_in_ = 3
if not n_objects:
n_objects = triplets.max() + 1
random_state = check_random_state(self.random_state)
diff --git a/cblearn/embedding/_dims.py b/cblearn/embedding/_dims.py
index 05f6d1d..f7d56c1 100644
--- a/cblearn/embedding/_dims.py
+++ b/cblearn/embedding/_dims.py
@@ -10,13 +10,28 @@
@dataclass
class DimensionEstimationResult:
- estimated_dimension: int
+ """ Result object for dimensionality estimation of embeddings.
+
+ Attributes:
+ estimated_dimension: The estimated dimensionality
+ dimensions: The tested dimensions
+ train_scores: The training scores for each dimension
+ test_scores: The test scores for each dimension
+ stats_result: The result of the hypothesis test
+ """
+ estimated_dimension: int # The estimated dimensionality
dimensions: np.ndarray
train_scores: np.ndarray
test_scores: np.ndarray
stats_result: dict
def plot_scores(self, train_kwargs={}, test_kwargs={}):
+ """ Plot the train and test scores per dimesionality of the embedding.
+
+ Args:
+ train_kwargs: Keyword arguments for the training scores plot.
+ test_kwargs: Keyword arguments for the test scores plot.
+ """
import matplotlib.pyplot as plt
plot_validation_curve(self.dimensions, self.train_scores, self.test_scores,
@@ -123,8 +138,14 @@ def _sequential_crossval_ttest(test_scores_cv, n_splits, alpha):
def estimate_dimensionality_cv(estimator, queries, responses=None,
test_dimensions: list = [1, 2, 3], n_splits=10, n_repeats=1,
refit=True, alpha=0.05, param_name="n_components", n_jobs=-1, random_state=None):
- """ Estimates the dimensionality of the embedding space through cross-validation
- that has the best fit for the provided data [1]_.
+ """ Estimates the dimensionality of the embedding space.
+
+ The procedure estimates embeddings for the provided *test_dimensions*
+ and evaluates the fit (triplet accuracy) through cross-validation [1]_.
+ The estimated dimension is the lowest,
+ that has the best fit for the provided data. The test compares the increase in accuracy;
+ if the increase is not significant, the dimension is considered to be sufficient.
+ Testing a larger range of dimensions can reduce the test sensitivity due to multiple testing correction.
Attributes:
estimator: The embedding estimator to use.
diff --git a/cblearn/embedding/_forte.py b/cblearn/embedding/_forte.py
index c76d5c5..742eab8 100644
--- a/cblearn/embedding/_forte.py
+++ b/cblearn/embedding/_forte.py
@@ -87,7 +87,9 @@ def fit(self, X: utils.Query, y: np.ndarray = None, init: np.ndarray = None,
Returns:
self.
"""
+ self.fit_X_ = utils.check_query(X, result_format='list-order') # for data validation in .transform
triplets = utils.check_query_response(X, y, result_format='list-order')
+ self.n_features_in_ = 3
if not n_objects:
n_objects = triplets.max() + 1
random_state = check_random_state(self.random_state)
diff --git a/cblearn/embedding/_gnmds.py b/cblearn/embedding/_gnmds.py
index fb1be9f..db05c34 100644
--- a/cblearn/embedding/_gnmds.py
+++ b/cblearn/embedding/_gnmds.py
@@ -107,7 +107,9 @@ def fit(self, X: utils.Query, y: np.ndarray = None, init: np.ndarray = None,
Returns:
self.
"""
+ self.fit_X_ = utils.check_query(X, result_format='list-order') # for data validation in .transform
triplets = utils.check_query_response(X, y, result_format='list-order')
+ self.n_features_in_ = 3
if not n_objects:
n_objects = triplets.max() + 1
random_state = check_random_state(self.random_state)
diff --git a/cblearn/embedding/_mlds.py b/cblearn/embedding/_mlds.py
index a4caf07..5e568c6 100644
--- a/cblearn/embedding/_mlds.py
+++ b/cblearn/embedding/_mlds.py
@@ -82,11 +82,18 @@ def fit(self, X: utils.Query, y: np.ndarray = None) -> 'MLDS':
Returns:
This estimator
"""
+ self.fit_X_ = utils.check_query(X, result_format='list-order') # for data validation in .transform
random_state = check_random_state(self.random_state)
- n_objects = X.max() + 1
triplets, answer = utils.check_query_response(X, y, result_format='list-boolean')
+ self.n_features_in_ = 3
+ n_objects = triplets.max() + 1
quads = triplets[:, [1, 0, 0, 2]]
+ flip = quads[:, [0, 1]].max(axis=1) > quads[:, [2, 3]].min(axis=1)
+ # make sure that we "standardize" the order of quadruplets to ensure
+ # that both True/False answers occur, which is required by the Logistic Regression
+ quads = np.where(np.c_[flip, flip, flip, flip], quads[:, [2, 3, 0, 1]], quads)
+ answer[flip] = ~answer[flip]
if self.method.lower() == 'glm':
X01, rows = np.zeros((len(quads), n_objects)), np.arange(len(triplets))
X01[rows, quads[:, 0]] += 1
diff --git a/cblearn/embedding/_oenn.py b/cblearn/embedding/_oenn.py
index 3ddabe8..f215275 100644
--- a/cblearn/embedding/_oenn.py
+++ b/cblearn/embedding/_oenn.py
@@ -104,7 +104,10 @@ def fit(self, X: utils.Query, y: np.ndarray = None,
Returns:
self.
"""
+ self.fit_X_ = utils.check_query(X, result_format='list-order') # for data validation in .transform
triplets = utils.check_query_response(X, y, result_format='list-order')
+ self.n_features_in_ = 3
+
random_state = check_random_state(self.random_state)
if n_objects is None:
n_objects = triplets.max() + 1
diff --git a/cblearn/embedding/_soe.py b/cblearn/embedding/_soe.py
index ed83111..d2c7f96 100644
--- a/cblearn/embedding/_soe.py
+++ b/cblearn/embedding/_soe.py
@@ -121,7 +121,9 @@ def fit(self, X: utils.Query, y: np.ndarray = None, init: np.ndarray = None,
Returns:
self.
"""
+ self.fit_X_ = utils.check_query(X, result_format='list-order') # for data validation in .transform
queries = utils.check_query_response(X, y, result_format='list-order')
+ self.n_features_in_ = 3
if not n_objects:
n_objects = queries.max() + 1
random_state = check_random_state(self.random_state)
diff --git a/cblearn/embedding/_ste.py b/cblearn/embedding/_ste.py
index d47d2ef..e43fdab 100644
--- a/cblearn/embedding/_ste.py
+++ b/cblearn/embedding/_ste.py
@@ -110,7 +110,9 @@ def fit(self, X: utils.Query, y: np.ndarray = None, init: np.ndarray = None,
Returns:
self.
"""
+ self.fit_X_ = utils.check_query(X, result_format='list-order') # for data validation in .transform
triplets = utils.check_query_response(X, y, result_format='list-order')
+ self.n_features_in_ = 3
if not n_objects:
n_objects = triplets.max() + 1
random_state = check_random_state(self.random_state)
@@ -165,7 +167,7 @@ def _ste_x_grad(x, x_shape, triplets, heavy_tailed):
kernel = np.exp(-dist)
I, J, K = tuple(triplets.T)
- P = kernel[I, J] / (kernel[I, J] + kernel[I, K])
+ P = kernel[I, J] / (kernel[I, J] + kernel[I, K] + 1e-12)
loss = -np.log(np.maximum(P, np.finfo(float).tiny)).sum()
if heavy_tailed:
diff --git a/cblearn/embedding/tests/test_sklearn_estimator_checks.py b/cblearn/embedding/tests/test_sklearn_estimator_checks.py
index fadfcaf..561ef64 100644
--- a/cblearn/embedding/tests/test_sklearn_estimator_checks.py
+++ b/cblearn/embedding/tests/test_sklearn_estimator_checks.py
@@ -12,11 +12,17 @@
While some of the checks could be adapted to our setting,
some cannot work with triplet input.
"""
-from contextlib import contextmanager
+import warnings
import pytest
import numpy as np
from sklearn.utils.estimator_checks import parametrize_with_checks
+import sklearn.utils.estimator_checks
+from sklearn.metrics.pairwise import linear_kernel, pairwise_distances
+from sklearn.utils._tags import (
+ _DEFAULT_TAGS,
+ _safe_tags,
+)
from cblearn.embedding import SOE, MLDS, STE, TSTE, CKL, GNMDS
from cblearn.embedding import wrapper
@@ -27,62 +33,114 @@
ALL_TRIPLET_EMBEDDING_ESTIMATORS = [SOE(), MLDS(), STE(), TSTE(), CKL(), GNMDS()]
-def _features_to_triplets(X):
- """ Guess if input are features and sample triplets then. """
- if isinstance(X, np.ndarray) and (
- not np.issubdtype(X.dtype, np.uint) or X.ndim != 2 or X.shape[1] != 3):
- n = X.size
- new_X = make_random_triplets(X, size=n, result_format='list-order', random_state=1)
- print("to triplets", X.shape, X.dtype, new_X.shape, new_X.dtype)
- return new_X
- else:
- print("are triplets", np.asarray(X).shape)
- return X
+##########
+# Monkey patching to transform featurized input to triplets
+orig_enforce_estimator_tags_X = sklearn.utils.estimator_checks._enforce_estimator_tags_X
+orig_enforce_estimator_tags_y = sklearn.utils.estimator_checks._enforce_estimator_tags_y
-@contextmanager
-def wrap_triplet_estimator(estimator):
- """ Replace feature input estimator input by triplets in context.
- Wraps fit and predict methods on context enter.
- After context exit, original methods are restored.
- """
- orig_fit = estimator.__class__.fit
- orig_predict = estimator.__class__.predict
+def _enforce_estimator_tags_X(estimator, X, kernel=linear_kernel):
+ X = orig_enforce_estimator_tags_X(estimator, X, kernel)
+ if _safe_tags(estimator, key="triplets"):
+ n = X.shape[0]
+ if len(X) == 1: # make_random_triplets expects at least 3 objects
+ X = np.r_[X, X, X]
+ X = make_random_triplets(X, size=n, result_format='list-order')
+ return X
+
+
+def _enforce_estimator_tags_y(estimator, y):
+ y = orig_enforce_estimator_tags_y(estimator, y)
+ if _safe_tags(estimator, key="triplets"):
+ #y = np.where(y == y.flat[0], 1, -1)
+ n = y.shape[0]
+ y = np.ones(n)
+ return y
- estimator.__class__.fit = lambda self, X, y=None: orig_fit(self, _features_to_triplets(X))
- estimator.__class__.predict = lambda self, X: orig_predict(self, _features_to_triplets(X))
- yield estimator
+sklearn.utils.estimator_checks._enforce_estimator_tags_X = _enforce_estimator_tags_X
+sklearn.utils.estimator_checks._enforce_estimator_tags_y = _enforce_estimator_tags_y
+###########
- # Context exit
- estimator.__class__.fit = orig_fit
- estimator.__class__.predict = orig_predict
+def test_enforce_estimator_tags_monkeypatch():
+ X = np.random.rand(10, 5)
+ y = np.random.rand(10, 1)
+ estimator = ALL_TRIPLET_EMBEDDING_ESTIMATORS[0]
+ assert _safe_tags(estimator).get('triplets', False)
+ new_X = sklearn.utils.estimator_checks._enforce_estimator_tags_X(estimator, X)
+ new_y = sklearn.utils.estimator_checks._enforce_estimator_tags_y(estimator, y)
-SKIP_CHECKS = [
- 'check_estimators_nan_inf',
- 'check_estimator_sparse_data',
- 'check_estimators_pickle',
- 'check_pipeline_consistency',
+ assert new_X.shape[1] == 3
+ assert new_y.shape[0] == new_X.shape[0]
+ assert new_X.shape[0] >= X.shape[0]
+ assert np.isin(np.unique(new_X), np.arange(10)).all()
+ np.testing.assert_equal(np.unique(new_y), [1])
+
+
+# These tests require a 1-to-1 relationship for X -> .transform(X).
+# This will never be true for our estimators (n-to-m).
+# The alternative to skipping them here would be the 'non_deterministic' tag.
+# This tag, however, would skip more tests than necessary.
+SKIP_FOR_TRIPLETS = [
'check_methods_subset_invariance',
- 'check_transformer_general',
- 'check_transformer_data_not_an_array',
- 'check_n_features_in',
- 'check_fit2d_1sample',
- 'check_fit2d_predict1d',
- 'check_fit_score_takes_y',
- 'check_estimators_empty_data_messages',
- 'check_methods_sample_order_invariance',
+ 'check_methods_sample_order_invariance'
]
-
@pytest.mark.sklearn
+@pytest.mark.filterwarnings("ignore:Expects the same X queries") # Expected in check_fit_idem
@parametrize_with_checks(
ALL_TRIPLET_EMBEDDING_ESTIMATORS
)
def test_all_estimators(estimator, check):
- if check.func.__name__ in SKIP_CHECKS:
- pytest.skip("cblearn ordinal embedding estimator's are not fully compatible to sklearn estimators.")
+ tags = _safe_tags(estimator)
+ if not (tags.get('triplets') and check.func.__name__ in SKIP_FOR_TRIPLETS):
+ check(estimator)
+
+
+@pytest.mark.parametrize(
+ "estimator",
+ ALL_TRIPLET_EMBEDDING_ESTIMATORS
+)
+def test_transform_warns_with_other_X(estimator):
+ """ Test if warnings are raised when using different X instances in fit and transform. """
+ X = np.random.rand(10, 3)
+ X = _enforce_estimator_tags_X(estimator, X)
+ estimator.fit(X)
+ with warnings.catch_warnings(record=True) as w:
+ estimator.transform(X)
+ estimator.transform(None)
+ assert len(w) == 0, "Expected no warnings"
+
+ other_X = X + 1
+ with pytest.warns(UserWarning, match="Expects the same X queries in .fit and .transform"):
+ estimator.transform(other_X)
+
+ X += 1
+ with pytest.warns(UserWarning, match="Expects the same X queries in .fit and .transform"):
+ estimator.transform(X)
+
+
+@pytest.mark.parametrize(
+ 'estimator',
+ ALL_TRIPLET_EMBEDDING_ESTIMATORS
+)
+def test_make_pipeline(estimator):
+ """ Assure that a pipeline can be constructed with ordinal embedding estimators
+ and that the resulting pipeline behaves as running the steps individually.
+ """
+ from sklearn.pipeline import make_pipeline
+ from sklearn.cluster import KMeans
+ from sklearn.datasets import make_blobs
+
+ X_feat, y_clust = make_blobs(n_samples=12, n_features=1, centers=3)
+ X_trip, y_trip = make_random_triplets(X_feat, size=100, result_format='list-count')
+ kmeans = KMeans(3, random_state=42)
+ estimator.random_state = 42
+ pipe = make_pipeline(estimator, kmeans)
+
+ y_pred_clust_pipe = pipe.fit_predict(X_trip, y_trip)
+ X_emb = estimator.fit_transform(X_trip, y_trip)
+ y_pred_clust = kmeans.fit_predict(X_emb)
- with wrap_triplet_estimator(estimator) as wrapped_estimator:
- check(wrapped_estimator)
+ np.testing.assert_array_equal(y_pred_clust_pipe, y_pred_clust)
diff --git a/cblearn/metrics/__init__.py b/cblearn/metrics/__init__.py
index 33419e4..2a2b4a2 100644
--- a/cblearn/metrics/__init__.py
+++ b/cblearn/metrics/__init__.py
@@ -1,4 +1,4 @@
from ._triplets import query_error
from ._triplets import query_accuracy
-from ._triplets import QueryScorer
+from ._triplets import query_accuracy_scorer
from ._procrustes import procrustes_distance
diff --git a/cblearn/metrics/_triplets.py b/cblearn/metrics/_triplets.py
index 83a68ce..1d69dbc 100644
--- a/cblearn/metrics/_triplets.py
+++ b/cblearn/metrics/_triplets.py
@@ -58,12 +58,17 @@ def query_accuracy(true_response: utils.Response, pred_response: utils.Response)
def query_error(true_response: utils.Response, pred_response: utils.Response) -> float:
- return 1 - query_accuracy(true_response, pred_response)
+ """ Error measured by 1 - query accuracy.`
+ See :py:func:`cblearn.metrics.query_accuracy` for more information."""
+ return 1 - query_accuracy(true_response, pred_response)
-def _scorer(true_response, query):
- query, pred_response = utils.check_query_response(query, result_format='list-boolean')
- return query_accuracy(true_response, pred_response)
+def query_accuracy_scorer(clf, X, y):
+ """Scorer function for query accuracy, compatible with sklearn's scorer API.
-QueryScorer = metrics.make_scorer(_scorer)
+ See :py:func:`cblearn.metrics.query_accuracy` for more information.
+ """
+ X, y = utils.check_query_response(X, y, result_format='list-count')
+ y_pred = clf.predict(X, result_format='list-count')
+ return query_accuracy(y, y_pred)
\ No newline at end of file
diff --git a/cblearn/metrics/tests/test_triplets.py b/cblearn/metrics/tests/test_triplets.py
index 7a8d627..8e57174 100644
--- a/cblearn/metrics/tests/test_triplets.py
+++ b/cblearn/metrics/tests/test_triplets.py
@@ -12,8 +12,8 @@ def __init__(self, embedding):
def transform(self, *args, **kwargs):
return self.embedding
- def predict(self, triplets):
- result = datasets.triplet_response(triplets, self.embedding)
+ def predict(self, triplets, result_format=None):
+ result = datasets.triplet_response(triplets, self.embedding, result_format=result_format)
if isinstance(result, tuple):
return result[1]
else:
@@ -55,5 +55,5 @@ def test_triplet_scorer():
triplets, answers = datasets.triplet_response(triplets, embedding, result_format='list-boolean')
estimator = DummyOrdinalEmbedding(embedding)
- assert metrics.QueryScorer(estimator, triplets, answers) == 1
- assert metrics.QueryScorer(estimator, triplets, ~answers) == 0
+ assert metrics.query_accuracy_scorer(estimator, triplets, answers) == 1
+ assert metrics.query_accuracy_scorer(estimator, triplets, ~answers) == 0
diff --git a/cblearn/preprocessing/_label.py b/cblearn/preprocessing/_label.py
index 9bbed6e..178b4ae 100644
--- a/cblearn/preprocessing/_label.py
+++ b/cblearn/preprocessing/_label.py
@@ -119,12 +119,21 @@ def query_from_columns(data: Union[np.ndarray, "pandas.DataFrame"], # noqa: F82
return_transformer: bool = False) \
-> Union[Tuple[np.ndarray, np.ndarray],
Tuple[Tuple[np.ndarray, np.ndarray], Tuple[TransformerMixin, TransformerMixin]]]:
- """ Extract queries from objects in columns or dataframes.
-
- The objects in the column data might be defined by a single or multiple numerical attributes.
- Then this function assigns to each object an index and returns query and response based on object indices,
- as required by most library functions.
- If attributes are non-numeric, consider first encoding them with :class:`sklearn.preprocessing.LabelEncoder`.
+ """ Extract queries with indices from feature columns in a DataFrame.
+
+ Comparison-based data in this libarary is typically represented by a collection
+ of unique object indices. For example, [[1, 0, 2], [0, 2, 3]] could encode two triplet
+ comparisons between objects 0, 1, 2, and 3.
+ Experimental data, however, often stores the objects as featurized columns in a dataframe, describing the
+ presented stimuli.
+ There the same comparisons could be represented by two rows with columns
+ `alpha1`, `tau1` `alpha2`, `tau2`, `alpha3`, `tau3` and `Response`.
+ The `query_from_columns` function allows to extract the comparsion queries
+ from such a dataframe by identifying the unique objects (e.g. unique combination of `alpha` and `tau` here).
+
+ .. note::
+ If the dataframe already contains unique indices for the objects per query,
+ consider accessing the indices directly, e.g. `df[['anchor_ix', 'pos_ix', 'neg_ix']].values.astype(int), df['response'].values.astype(bool)`.
>>> import pandas as pd
>>> frame = pd.DataFrame({'alpha1': [0.1, 0.7, 0.1], 'tau1': [0, 0, 1],
@@ -140,8 +149,15 @@ def query_from_columns(data: Union[np.ndarray, "pandas.DataFrame"], # noqa: F82
... response_columns='Response', response_map={1: True, 0: False})
>>> q.tolist(), r.tolist()
([[0, 3, 4], [4, 2, 3], [1, 4, 4]], [True, False, False])
+ >>> q, r = query_from_columns(frame, [('alpha1', 'tau1'), ('alpha2', 'tau2'), ('alpha3', 'tau3')],
+ ... response_columns='Response')
+ >>> q.tolist(), r.tolist()
+ ([[0, 3, 4], [4, 2, 3], [1, 4, 4]], [1, 0, 0])
- The transformers can be used to get object attributes from the object index.
+ The indices can be used to get the object attributes from the dataframe.
+ This might be helpful in visulizations and for debugging.
+ In the following example, the object-feature to object-index transformer object
+ is accessed to get the object attributes from the object index.
>>> (q,r), (q_transform, r_transform) = query_from_columns(
... np.array(frame), [0, 2, 4], -1, {1: True, 0: False}, return_transformer=True)
@@ -154,7 +170,7 @@ def query_from_columns(data: Union[np.ndarray, "pandas.DataFrame"], # noqa: F82
query_columns: Indices or column-labels in data per query entry.
Columns can be grouped as tuples, if multiple columns define an object.
response_columns: Indices or column-labels in data per response entry.
- response_map: Dictionary mapping the response entries in data to {-1, 1} or {False, True}.
+ response_map: Dictionary mapping the response entries in data to {-1, 1} or {False, True}. If none, use the original response.
return_transformer: If true, transformer objects for the query and response are returned.
Returns:
Tuple with arrays for the queries and responses.
@@ -172,9 +188,17 @@ def query_from_columns(data: Union[np.ndarray, "pandas.DataFrame"], # noqa: F82
query = query_enc.fit_transform(query_data)
if response_columns:
- inverse_map = {v: k for k, v in response_map.items()}
- response_enc = FunctionTransformer(func=np.vectorize(response_map.get),
- inverse_func=np.vectorize(inverse_map.get), check_inverse=False)
+ if response_map is None:
+ response_enc = FunctionTransformer(
+ func=lambda x: x,
+ inverse_func=lambda x: x,
+ check_inverse=False)
+ else:
+ inverse_map = {v: k for k, v in response_map.items()}
+ response_enc = FunctionTransformer(
+ func=np.vectorize(response_map.get),
+ inverse_func=np.vectorize(inverse_map.get),
+ check_inverse=False)
response = response_enc.fit_transform(data[response_columns])
if return_transformer:
return (query, response), (query_enc, response_enc)
diff --git a/cblearn/utils/__init__.py b/cblearn/utils/__init__.py
index e5205a4..a113dfa 100644
--- a/cblearn/utils/__init__.py
+++ b/cblearn/utils/__init__.py
@@ -1,4 +1,5 @@
from ._validate_data import check_query, check_response, check_query_response
+from ._validate_data import isin_query
from ._validate_size import check_size
from ._data_format import data_format, check_format
from ._data_format import QueryFormat, ResponseFormat, Format
diff --git a/cblearn/utils/_data_format.py b/cblearn/utils/_data_format.py
index 06edc8e..eed3024 100644
--- a/cblearn/utils/_data_format.py
+++ b/cblearn/utils/_data_format.py
@@ -64,6 +64,7 @@ def data_format(query: Union[Query], response: Optional[np.ndarray] = None
Raises:
TypeError: Invalid type of data.
"""
+ query_format = None
if isinstance(query, (scipy.sparse.spmatrix, sparse.SparseArray)):
query_format = QueryFormat.TENSOR
elif isinstance(query, (Sequence, np.ndarray)):
@@ -71,6 +72,15 @@ def data_format(query: Union[Query], response: Optional[np.ndarray] = None
elif query is None:
query_format = None
else:
+ try:
+ # a last resort: can numpy read the object?
+ arr = np.asarray(query)
+ if arr.ndim == 2:
+ query_format = QueryFormat.LIST
+ except ValueError:
+ pass
+
+ if query_format is None:
raise ValueError(f"Expects query as sequence, array, or sparse array; got {query}")
if response is None:
@@ -80,14 +90,18 @@ def data_format(query: Union[Query], response: Optional[np.ndarray] = None
return query_format, ResponseFormat.ORDER
else:
return query_format, None
- elif isinstance(response, (Sequence, np.ndarray)):
- response_dtype = np.asarray(response).dtype
else:
- return query_format, None
+ try:
+ resp_arr = np.asarray(response)
+ except ValueError:
+ raise ValueError(f"Expects response as None or array-like, got {response}")
+ response_dtype = resp_arr.dtype
+ if np.isin(resp_arr, [0, 1]).all():
+ response_dtype = bool
if response_dtype == bool:
return query_format, ResponseFormat.BOOLEAN
elif np.issubdtype(response_dtype, np.number):
return query_format, ResponseFormat.COUNT
else:
- raise ValueError(f"Expects response dtype bool or numeric, got {response_dtype}")
\ No newline at end of file
+ raise ValueError(f"Unknown label type: Expects response as bool or numeric, got {response_dtype}")
\ No newline at end of file
diff --git a/cblearn/utils/_validate_data.py b/cblearn/utils/_validate_data.py
index 083cd68..cff37a7 100644
--- a/cblearn/utils/_validate_data.py
+++ b/cblearn/utils/_validate_data.py
@@ -10,11 +10,33 @@
from ._typing import Query
-def _check_list_query_response(query, response):
+def isin_query(queries: np.ndarray, test_queries: np.ndarray) -> np.ndarray:
+ """ Calculates queries in test_queries (row-wise).
+
+ Returns a boolean array of the same shape as queries that is True where an query of queries is in test_queries and False otherwise.
+
+ Args:
+ queries: Input array
+ test_queries: The query array to test against.
+ Returns:
+ isin: same length as queries.
+ """
+ queries = check_array(queries)
+ test_queries = check_array(test_queries)
+ if queries.shape[1] != test_queries.shape[1]:
+ raise ValueError(f"Expects equal number of columns, got {queries.shape[1]} != {test_queries.shape[1]}")
+ dtype = [(f'f{i}', int) for i in range(queries.shape[1])]
+ test_queries_struct = np.core.records.fromarrays(test_queries.T, dtype=dtype)
+ queries_struct = np.core.records.fromarrays(queries.T, dtype=dtype)
+ is_in = np.isin(queries_struct, test_queries_struct)
+ return is_in
+
+
+def _check_list_query_response(query, response, **kwargs):
if response is None:
- return check_array(query, dtype=np.uint32), None
+ return check_array(query, dtype=np.uint32, **kwargs), None
else:
- return check_X_y(query, response, dtype=np.uint32)
+ return check_X_y(query, response, dtype=np.uint32, **kwargs)
def _unroll_responses(query: Optional[np.ndarray], response: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
@@ -60,7 +82,7 @@ def check_bool_list_query_response(query, response, standard: bool = True):
__, input_response_format = data_format(query, response)
if input_response_format is ResponseFormat.BOOLEAN:
- bool_response = response
+ bool_response = response.astype(bool)
elif input_response_format is ResponseFormat.COUNT:
if np.any(response == 0):
raise ValueError("Undecided responses (0) cannot be represented as order or bool.")
@@ -71,6 +93,7 @@ def check_bool_list_query_response(query, response, standard: bool = True):
if standard:
query, mask = _standardize_list_query(query)
+ bool_response = np.array(bool_response, copy=True)
bool_response[mask] = ~bool_response[mask]
return query, bool_response
@@ -78,22 +101,22 @@ def check_bool_list_query_response(query, response, standard: bool = True):
def check_count_list_query_response(query, response, standard: bool = True):
__, input_response_format = data_format(query, response)
- query, response = _check_list_query_response(query, response)
+ query, response = _check_list_query_response(query, response, copy=True)
if input_response_format is ResponseFormat.COUNT:
query, count_response = _unroll_responses(query, response)
if input_response_format is ResponseFormat.BOOLEAN:
count_response = response.astype(int) * 2 - 1
elif input_response_format is ResponseFormat.ORDER:
count_response = np.full((query.shape[0],), 1)
-
if standard:
query, mask = _standardize_list_query(query)
+ count_response = np.array(count_response, copy=True)
count_response[mask] *= -1
return query, count_response
def check_order_list_query_response(query, response):
- query, response = _check_list_query_response(query, response)
+ query, response = _check_list_query_response(query, response, copy=True)
__, input_response_format = data_format(query, response)
if input_response_format is ResponseFormat.COUNT:
@@ -157,13 +180,16 @@ def check_tensor_query_response(query: Union[sparse.COO, scipy.sparse.spmatrix],
if isinstance(query, scipy.sparse.spmatrix):
n_objects = query.shape[0]
- n_dim = int(np.ceil(np.log(np.product(query.shape)) / np.log(n_objects)))
+ n_dim = int(np.ceil(np.log(np.prod(query.shape)) / np.log(n_objects)))
query = sparse.COO.from_scipy_sparse(query)
else:
n_objects = max(query.shape)
n_dim = len(query.shape)
expected_shape = n_dim * (n_objects,)
+ if query.size != np.prod(expected_shape):
+ raise ValueError(f"Expects sparse matrix reshapeable to {expected_shape}, "
+ f"got {query.shape}.")
if len(query.shape) != n_dim or np.any(np.not_equal(query.shape, expected_shape)):
query = query.reshape(expected_shape)
@@ -197,7 +223,7 @@ def check_response(response: np.ndarray, result_format: Optional[Format] = None)
if result_format[0] is not QueryFormat.LIST or result_format[1] is ResponseFormat.ORDER:
raise ValueError(f"Expects result format list-boolean or list-count, got {result_format}.")
- dummy_query = np.empty_like(response).reshape(-1, 1)
+ dummy_query = np.zeros_like(response).reshape(-1, 1)
return check_list_query_response(dummy_query, response, standard=False, result_format=(result_format))[1]
@@ -277,7 +303,6 @@ def check_query_response(query: Union[Query], response: Optional[np.ndarray] = N
"""
input_query_format, input_response_format = data_format(query, response)
output_query_format, output_response_format = check_format(result_format, query, response)
-
if output_query_format is QueryFormat.TENSOR:
if input_query_format is QueryFormat.LIST:
query, response = check_list_query_response(query, response, (QueryFormat.LIST, output_response_format),
diff --git a/cblearn/utils/tests/test_validate_data.py b/cblearn/utils/tests/test_validate_data.py
index 4417355..c0f962a 100644
--- a/cblearn/utils/tests/test_validate_data.py
+++ b/cblearn/utils/tests/test_validate_data.py
@@ -123,12 +123,19 @@ def test_check_query_response_FORMAT(input, response_format, test_output):
""" Test all possible conversations of answer types. """
if isinstance(input, tuple):
triplets, answers = input
+ orig_triplets = np.array(triplets, copy=True)
+ orig_answers = np.array(answers, copy=True)
else:
triplets, answers = input, None
+ orig_triplets = np.array(triplets, copy=True)
triplet_answers = utils._validate_data.check_list_query_response(triplets, answers, standard=True,
result_format=response_format)
np.testing.assert_equal(triplet_answers, test_output)
+ np.testing.assert_equal(triplets, orig_triplets, err_msg="Input data was modified.")
+ if answers is not None:
+ np.testing.assert_equal(answers, orig_answers, err_msg="Input data was modified.")
+
def test_check_query_response_UNDECIDED():
with pytest.raises(ValueError):
@@ -143,3 +150,11 @@ def test_check_query_response_UNDECIDED():
print(answers)
np.testing.assert_equal(triplets, triplets_numeric_undecided)
np.testing.assert_equal(answers, answers_numeric_undecided)
+
+
+def test_isin_query():
+ fit_X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+ new_X = np.array([[4, 5, 6], [1, 2, 3], [2, 2, 3], [4, 5, 6]])
+
+ isin = utils.isin_query(new_X, fit_X)
+ np.testing.assert_array_equal(isin, [True, True, False, True])
\ No newline at end of file
diff --git a/docs/car_embedding.jpg b/docs/car_embedding.jpg
new file mode 100644
index 0000000..c40713c
Binary files /dev/null and b/docs/car_embedding.jpg differ
diff --git a/docs/comparison_tasks.png b/docs/comparison_tasks.png
new file mode 100644
index 0000000..fff92e1
Binary files /dev/null and b/docs/comparison_tasks.png differ
diff --git a/docs/comparison_tasks.svg b/docs/comparison_tasks.svg
new file mode 100644
index 0000000..c2d2499
--- /dev/null
+++ b/docs/comparison_tasks.svg
@@ -0,0 +1,271 @@
+
+
+
+
diff --git a/docs/conf.py b/docs/conf.py
index 1af713d..490bc7f 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -18,8 +18,14 @@
# -- Project information -----------------------------------------------------
project = 'cblearn'
-author = 'David-Elias Künstle & Ulrike von Luxburg'
-copyright = f'2023, {author}'
+author = 'David-Elias Künstle, Ulrike von Luxburg, & Contributors'
+copyright = f'2024, {author}'
+html_theme_options = {
+ "logo": {
+ "image_light": "logo-light.png",
+ "image_dark": "logo-dark.png",
+ }
+}
# -- General configuration ---------------------------------------------------
@@ -34,7 +40,7 @@
'sphinx.ext.viewcode',
'sphinx.ext.mathjax',
'sphinx.ext.napoleon',
- 'sphinx_rtd_theme',
+ 'pydata_sphinx_theme',
'sphinx_gallery.gen_gallery',
]
@@ -78,7 +84,7 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
-html_theme = 'sphinx_rtd_theme'
+html_theme = 'pydata_sphinx_theme'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
diff --git a/docs/contributor_guide/index.rst b/docs/contributor_guide/index.rst
index 9fdd2bd..5e8ca6b 100644
--- a/docs/contributor_guide/index.rst
+++ b/docs/contributor_guide/index.rst
@@ -1,3 +1,5 @@
+.. _contributor_guide:
+
=================
Contributor Guide
=================
@@ -9,11 +11,42 @@ This guide describes how to contribute code or documentation.
.. _Github issues: https://github.com/dekuenstle/cblearn/issues
----------------
-Getting Started
----------------
+.. _developer_install:
+
+------------
+Installation
+------------
+
+Contributors should not install the package from PyPI but from the Github repository
+to get the latest version and to be able to manipulate the code.
+First download the repository and install the project in developer mode with
+developer dependencies.
+
+.. code-block:: bash
+
+ $ git clone git@github.com/cblearn/cblearn.git
+ $ cd cblearn
+ $ pip install -e.[tests,docs]
+
+The ``-e`` option installs the package in developer mode such that changes in the code are considered directly without re-installation.
+
+tests
+ To run the unit tests, the ``pytest`` package is required, which
+ can be installed by adding the ``tests`` option to the install command.
+
+docs
+ Building these docs requires the ``sphinx`` package, which can be installed by adding the `docs` option to the install command.
+
+
+Now you can run the tests and build the documentation:
+
+.. code-block:: bash
+
+ $ python -m pytest --remote-data # should run all tests; this can take a while.
+
+ $ cd docs
+ $ make html # should generate docs/_build/html/index.html
-We assume you downloaded and installed ``cblearn`` as described in :ref:`developer_install`.
The project directory contains the code directory ``cblearn/`` and the documentation ``docs/``.
In addition, the folder contains a readme, license, multiple configuration files, and an examples folder.
@@ -52,13 +85,21 @@ These tests are skipped by default but can be run by adding the ``--remote-data`
Scikit-learn estimator tests
----------------------------
``scikit-learn`` provides a test suite that should ensure the compatibility of estimators.
-We use this test suite to test our estimators, too, but have to skip some tests because they use artificial data incompatible
-to comparison data. Typically, ``cblearn`` estimators are compatible with ``scikit-learn`` estimators
-if comparisons are represented as ``numpy`` arrays. From an API perspective,
-comparison arrays look like discrete features and class labels; however, not all discrete features and class labels are valid comparisons.
+The estimator classes that require triplet data should return
+`'triplets'=True` in the ``_get_tags`` method.
+Based on this tag, our test suite extends the sklearn estimator test to handle comparison-based estimators.
+This modification is not unusual; sklearn internally modifies the data and skips individual tests silently based on different tags (e.g. *pairwise*).
+
+The modifications are:
+
+ - Monkey-patching of ``check_estimator`` function to create triplets instead of featurized data.
+ - Skipping ``check_methods_subset_invariance`` and ``check_methods_sample_order_invariance``
+
+ These tests require a 1-to-1 relationship for X -> .transform(X).
+ This will never be true for our estimators (n-to-m).
+ The alternative to skipping them here would be the 'non_deterministic' tag,
+ which would trigger sklearn to skip these but also additional tests.
-In the future scikit-learn might simplify the usage of custom data generation routines during the compatibility tests.
-Otherwise, we might replace those incompatible tests with our own.
All sklearn estimator tests can be skipped with ``pytest -m "not sklearn``.
diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst
new file mode 100644
index 0000000..d537e98
--- /dev/null
+++ b/docs/getting_started/index.rst
@@ -0,0 +1,104 @@
+.. _getting_started:
+
+================
+Getting Started
+================
+
+-----
+Setup
+-----
+
+``cblearn`` requires Python 3.9 or newer.
+The package is mainly tested on Linux, but Windows and Mac OS should work, too.
+
+Python environment
+==================
+The easiest way to install Python and its dependencies is using a
+ Anaconda_ environment or similar, because dependencies do not conflict with
+ other Python packages you may have installed and the Python version can be specified.
+
+.. _Anaconda: https://docs.anaconda.com/anaconda/install/
+
+.. code-block:: bash
+
+ conda create -n cblearn python==3.9
+ conda activate cblearn
+
+
+Install cblearn
+===================
+
+``cblearn`` and can be installed using `pip`:
+
+.. code-block:: bash
+
+ pip install cblearn
+
+
+This will install the minimal set of required packages, sufficient for most uses and saving disk space.
+However, some features require more packages that can be installed by adding an ``option`` to the install command.
+
+.. _extras_install:
+
+Install Extra Requirements
+==========================
+
+Extra requirements can be installed by adding an ``option`` to the install command and enable more advanced features.
+Some of those extra dependencies need non-Python packages to be installed first.
+
+.. code-block:: bash
+
+ $ pip install cblearn[torch,wrapper] h5py
+
+
+torch
+ Most estimators provide an (optional) implementation using ``pytorch`` to run large datasets on CPU and GPU.
+ This requires the ```pytorch`` `_ package to be installed manually
+ or by adding the ``torch`` extras option to the install command.
+ Note that ``pytorch`` might need about 1GB of disk space.
+
+wrapper
+ The estimators in :ref:`references_embedding_wrapper` provide an Python interface to the original implementation
+ in ``R``-lang.
+ This requires the ``rpy2`` package to be installed by adding the ``wrapper`` option to the install command.
+ Additionally, this requires an installed ``R`` interpreter whit must available be in the ``PATH`` environment variable.
+ The ``R`` packages are installed automatically upon the first use of the estimators.
+
+h5py
+ The function :func:`cblearn.datasets.fetch_imagenet_similarity` requires the ``h5py`` package to load the dataset.
+ This can package can be installed with pip.
+ Note that some platforms require additionally the ``hdf5`` libraries to be installed `manually `_.
+
+
+-----------
+Quick Start
+-----------
+
+`cblearn` is designed to be easy to use.
+The following example generates triplets from a point cloud,
+each specifying if point A is closer to point B or C, and fits an ordinal embedding
+model to the triplets.
+This ordinal embedding model is then used to predict the relative distances between the points.
+
+.. literalinclude:: quickstart.py
+ :language: python
+ :linenos:
+
+The output should show a trend similar to the following::
+
+ Triplets | Error (SSE)
+ ----------------------
+ 25 | 0.913
+ 100 | 0.278
+ 400 | 0.053
+ 1600 | 0.001
+
+The Procrustes distance measures the sum of squared errors between points and embedding
+after aligning the embedding to the points (i.e., by optimizing rotating, translation, and scaling).
+The error approaches zero, demonstrating that the relative distances in the point cloud can be reconstructed from triplets only
+once enough are available.
+
+The triplet generator's `result_format` option specifies the expected data format of the triplets, as triplets can be represented in different ways.
+This example uses the `list-order` format, a list of triplets, containing the indices of an anchor, near, and far point.
+Learn more about data formats and other aspects of the library in the :ref:`user_guide`.
+Alternatively, you can find more code in the :ref:`examples` or get an overview of the :ref:`api`.
diff --git a/docs/getting_started/quickstart.py b/docs/getting_started/quickstart.py
new file mode 100644
index 0000000..2f10d31
--- /dev/null
+++ b/docs/getting_started/quickstart.py
@@ -0,0 +1,14 @@
+import numpy as np
+from cblearn.datasets import make_random_triplets
+from cblearn.embedding import SOE
+from cblearn.metrics import procrustes_distance
+
+points = np.random.rand(20, 2)
+estimator = SOE(n_components=2)
+
+print(f"Triplets | Error (SSE)\n{22 * '-'}")
+for n in (25, 100, 400, 1600):
+ triplets = make_random_triplets(points, size=n, result_format="list-order")
+ embedding = estimator.fit_transform(triplets)
+ error = procrustes_distance(points, embedding)
+ print(f" {len(triplets):4d} | {error:.3f}")
diff --git a/docs/icon.png b/docs/icon.png
new file mode 100644
index 0000000..0492d55
Binary files /dev/null and b/docs/icon.png differ
diff --git a/docs/icon.svg b/docs/icon.svg
new file mode 100644
index 0000000..0dd5fea
--- /dev/null
+++ b/docs/icon.svg
@@ -0,0 +1,178 @@
+
+
+
+
diff --git a/docs/index.rst b/docs/index.rst
index 8458f0e..e0b71f1 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -3,14 +3,57 @@
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
-Welcome to cblearn's documentation!
-===================================
+cblearn documentation
+=====================
+
+`cblearn` is a Python package for comparison-based machine learning,
+which is a type of machine learning where the only available information
+is are ordinal comparisons of datapoint similarities.
+These comparisons often come from human experiments in the form of triplets (A, B, C)
+where A is more similar to B than to C, or from related tasks such as the odd-one-out.
+
+.. figure:: comparison_tasks.png
+ :align: center
+ :class: with-border
+
+ Examples of similarity comparison tasks, that can be processed with cblearn.
+ The most common task is the triplet comparison, where the goal is to find the most similar image (blue) to the reference image (red).
+ The show-8-rank-2 task is a variant of the triplet comparison where the goal is to find the second and first most similar image to the reference image.
+ The odd-one-out task is a task where the goal is to find the image that is the most dissimilar to the other images,
+ Both, the ranking and odd-one-out tasks can be converted to triplet comparisons and
+ analyzed with the same algorithms.
+
+**cblearn provides a set of tools to read, convert, and manipulate comparison-based datasets**.
+It also provides a set of comparison-based models, including the ordinal embedding and clustering,
+that can be used as part of a scikit-learn pipeline.
+As such, the package is used by researchers in machine learning to evaluate and
+develop new learning algorithms and by researchers in cognitive science to
+model perceived similarity in humans.
+
+.. figure:: car_embedding.jpg
+ :align: center
+ :width: 75%
+ :class: with-border
+
+ This is a 2-D embedding generated from humans selections of the "most representative" car image from three presented (see :ref:`central_car_dataset`).
+ The distance in the embedding space represents perceived dissimilarity between the images.
+ It appears that cars are grouped by their type in the embedding space.
+
+New users should start in the :ref:`getting_started` section.
+Bugs and feature requests are welcome on the `GitHub issue tracker`_.
+If you would like to contribute to the code or documentation, please check the :ref:`contributor_guide`.
+
+.. _GitHub issue tracker: https://github.com/cblearn/cblearn/issues
+
+
+
+
.. toctree::
:maxdepth: 2
- :caption: Contents:
+ :caption: Contents of the documentation:
- install
+ getting_started/index
user_guide/index
generated_examples/index.rst
references/index
diff --git a/docs/install.rst b/docs/install.rst
deleted file mode 100644
index 8337293..0000000
--- a/docs/install.rst
+++ /dev/null
@@ -1,99 +0,0 @@
-============
-Installation
-============
-
-``cblearn`` requires Python 3.9 or newer.
-We recommend using Anaconda_ to install Python and
-dependencies in separated environments.
-The package is mainly tested on Linux, but Windows and Mac OS should work, too.
-
-.. _Anaconda: https://docs.anaconda.com/anaconda/install/
-
-.. code-block:: bash
-
- conda create -n cblearn python==3.9
- conda activate cblearn
-
-
------------------
-User Installation
------------------
-
-``cblearn`` and can be installed using `pip`:
-
-.. code-block:: bash
-
- pip install cblearn
-
-This will install the minimal set of required packages, sufficient for most uses and saving disk space.
-However, some features require more packages that can be installed by adding an ``option`` to the install command.
-
-.. _extras_install:
-
-Extra Requirements
-===================
-
-Extra requirements can be installed by adding an ``option`` to the install command and enable more advanced features.
-Some of those extra dependencies need non-Python packages to be installed first.
-
-.. code-block:: bash
-
- $ pip install cblearn[torch,wrapper] h5py
-
-
-torch
- Most estimators provide an (optional) implementation using ``pytorch`` to run large datasets on CPU and GPU.
- This requires the ``pytorch`` package to be installed `manually `_
- or by adding the ``torch``` option to the install command.
- Note that ``pytorch`` might need about 1GB of disk space.
-
-wrapper
- The estimators in :ref:`references_embedding_wrapper` provide an Python interface to the original implementation
- in ``R``-lang.
- This requires the ``rpy2`` package to be installed by adding the ``wrapper`` option to the install command.
- Additionally, this requires an installed ``R`` interpreter whit must available be in the ``PATH`` environment variable.
- The ``R`` packages are installed automatically upon the first use of the estimators.
-
-h5py
- The function :func:`cblearn.datasets.fetch_imagenet_similarity` requires the ``h5py`` package to load the dataset.
- This can package can be installed with pip.
- Note that some platforms require additionally the ``hdf5`` libraries to be installed
- `manually `_.
-
-
-.. _developer_install:
-
-------------------------
-Contributor Installation
-------------------------
-
-
-If you want to make changes to the code or documentation, you should
-first download the repository and install the project in developer mode with
-developer dependencies. This way, changes in the code are directly considered without the need for re-installation.
-Additionally, packages required to run the tests and build the documentation are installed.
-
-.. code-block:: bash
-
- $ git clone git@github.com:cblearn/cblearn.git
- $ cd cblearn
- $ pip install -e.[tests,docs,torch,wrapper]
-
-The ``-e`` option installs the package in developer mode such that changes in the code are considered directly without re-installation.
-
-tests
- To run the unit tests, the ``pytest`` package is required, which
- can be installed by adding the ``tests`` option to the install command.
-
-docs
- Building these docs requires the ``sphinx`` package, which can be installed by adding the `docs` option to the install command.
-
-
-Now you can run the tests and build the documentation:
-
-.. code-block:: bash
-
- $ python -m pytest --remote-data # should run all tests; this can take a while.
-
- $ cd docs
- $ make html # should generate docs/_build/html/index.html
\ No newline at end of file
diff --git a/docs/logo-dark.png b/docs/logo-dark.png
new file mode 100644
index 0000000..f08c178
Binary files /dev/null and b/docs/logo-dark.png differ
diff --git a/docs/logo-dark.svg b/docs/logo-dark.svg
new file mode 100644
index 0000000..b200c36
--- /dev/null
+++ b/docs/logo-dark.svg
@@ -0,0 +1,193 @@
+
+
+
+
diff --git a/docs/logo-light.png b/docs/logo-light.png
new file mode 100644
index 0000000..c36cf19
Binary files /dev/null and b/docs/logo-light.png differ
diff --git a/docs/logo-light.svg b/docs/logo-light.svg
new file mode 100644
index 0000000..0252a4a
--- /dev/null
+++ b/docs/logo-light.svg
@@ -0,0 +1,193 @@
+
+
+
+
diff --git a/docs/references/index.rst b/docs/references/index.rst
index 2f13459..382c418 100644
--- a/docs/references/index.rst
+++ b/docs/references/index.rst
@@ -90,13 +90,13 @@ Low-level Dataset Utility
Utility
-------
-.. currentmodule:: cblearn.embedding
+.. currentmodule:: cblearn
.. autosummary::
:toctree: generated/
- estimate_dimensionality_cv
- DimensionEstimationResult
+ embedding.estimate_dimensionality_cv
+ embedding.DimensionEstimationResult
.. _references_embedding_wrapper:
@@ -140,7 +140,7 @@ Wrapper
metrics.query_accuracy
metrics.query_error
metrics.procrustes_distance
- metrics.QueryScorer
+ metrics.query_accuracy_scorer
:mod:`cblearn.preprocessing` Preprocessing
==========================================
diff --git a/docs/sg_execution_times.rst b/docs/sg_execution_times.rst
new file mode 100644
index 0000000..e520a8b
--- /dev/null
+++ b/docs/sg_execution_times.rst
@@ -0,0 +1,46 @@
+
+:orphan:
+
+.. _sphx_glr_sg_execution_times:
+
+
+Computation times
+=================
+**00:55.561** total execution time for 4 files **from all galleries**:
+
+.. container::
+
+ .. raw:: html
+
+
+
+
+
+
+
+ .. list-table::
+ :header-rows: 1
+ :class: table table-striped sg-datatable
+
+ * - Example
+ - Time
+ - Mem (MB)
+ * - :ref:`sphx_glr_generated_examples_plot_psychophysical_scales.py` (``../examples/plot_psychophysical_scales.py``)
+ - 00:28.124
+ - 0.0
+ * - :ref:`sphx_glr_generated_examples_ordinal_embedding.py` (``../examples/ordinal_embedding.py``)
+ - 00:11.776
+ - 0.0
+ * - :ref:`sphx_glr_generated_examples_triplet_formats.py` (``../examples/triplet_formats.py``)
+ - 00:10.988
+ - 0.0
+ * - :ref:`sphx_glr_generated_examples_small_embedding_benchmark.py` (``../examples/small_embedding_benchmark.py``)
+ - 00:04.673
+ - 0.0
diff --git a/docs/user_guide/adam_lr_triplet.png b/docs/user_guide/adam_lr_triplet.png
new file mode 100644
index 0000000..cbc009d
Binary files /dev/null and b/docs/user_guide/adam_lr_triplet.png differ
diff --git a/docs/user_guide/index.rst b/docs/user_guide/index.rst
index 8deaf1f..a65012f 100644
--- a/docs/user_guide/index.rst
+++ b/docs/user_guide/index.rst
@@ -1,11 +1,16 @@
+.. _user_guide:
+
==========
User Guide
==========
Most Machine Learning algorithms use numerical training data (features) for inference,
either representing points in a Euclidean space, similarities, or distances.
-The are settings, e.g. in human studies, when metric points are not available but only ordinal comparisons.
-Comparison-based learning algorithms are the machine learning algorithms applicable in this setting.
+There are settings, e.g., in human studies, when obtaining featurized data is difficult, but comparisons are straightforward.
+Comparison-based learning algorithms are the machine learning algorithms applicable in this setting,
+as they learn from ordinal comparisons between object similarity.
+
+.. _triplet_comparison:
-------------------
Triplet comparisons
@@ -20,10 +25,64 @@ inequality:
\delta(x_i, x_j) \le \delta(x_i, x_k).
-This library supports two representation formats of triplets in an array or a sparse matrix form.
-The array form uses 2d ``numpy`` arrays representing a triplet per row and columns for ``i,j,k``.
-Alternatively to the ordering, an additional response array containing 1 or -1 can specify if ``(i,j,k)`` is correct or wrong.
-The sparse matrix is an alternative representation, where triplets are naturally specified as the matrix indices, containing entries 1 or -1.
+Triplets can represented as 2d ``numpy`` arrays, where each row represents a triplet and the columns represent the indices of the objects.
+In the library, we call this representation the ordered list format or ``'list-order'``.
+
+.. code-block:: python
+
+ import numpy as np
+ triplet_responses = np.array([[0, 1, 2], # 0 is closer to 1 than 2
+ [2, 3, 1],
+ [1, 3, 0]])
+
+
+In some applications, having a separate array containing the correct response for each triplet is helpful.
+This array contains 1 for correct triplets and -1 for incorrect triplets.
+The following arrays in the ``'list-count'`` format are equivalent to the previous example.
+
+.. code-block:: python
+
+ triplets = np.array([[0, 1, 2],
+ [2, 1, 3],
+ [1, 0, 3]])
+ responses = np.array([1, -1, -1])
+
+These array-based formats are handy in interacting with other machine learning procedures as every row represents a "data point".
+Therefore, it is straightforward to sample triplets or split the data into training and test sets.
+
+.. code-block:: python
+
+ from sklearn.model_selection import train_test_split
+ X_train, X_test, y_train, y_test = train_test_split(triplets, responses, test_size=0.2)
+
+However, if you want to calculate directly with triplet data, the ``'sparse'`` representation is more suitable.
+Here, we use a sparse array with three dimensions and as many entries as there are objects.
+Based on the position in the array, entries with -1 and 1 encode whether the triplet is correct or incorrect.
+
+.. code-block:: python
+
+ import sparse
+ sp_triplets = sparse.COO(triplets.T, responses, shape=(4, 4, 4))
+ sp_triplets[0, 1, 2] # returns 1
+ sp_triplets[2, 1, 3] # returns -1
+ sp_triplets[1, 0, 3] # returns -1
+ sp_triplets[0, 1, 3] # returns 0
+
+ new_sp_triplets = sp_triplets + another_sp_triplets # element-wise addition to join two datasets
+
+
+The triplet-based algorithms in the ``cblearn`` library typically can handle all of the previous.
+An ordinal embedding estimator, for example, can train with either of the following:
+
+.. code-block:: python
+
+ from cblearn.embedding import SOE
+
+ soe = SOE(n_components=2)
+ soe.fit(triplet_responses)
+ soe.fit(triplets, responses)
+ soe.fit(sp_triplets)
+
--------------------------
Scikit-learn compatibility
@@ -33,7 +92,7 @@ All estimators in this library are compatible with the ``scikit-learn`` API and
if comparisons are represented in the array format.
The ``scikit-learn`` compatibility is achieved by implementing the ``fit``, ``predict``, and ``score`` methods of the ``BaseEstimator`` class.
-The ``fit`` method is used to train the model, the ``predict`` method is used to predict the labels of the test data,
+The ``fit`` method is used to train the model; the ``predict`` method is used to predict the labels of the test data,
and the ``score`` method is used to evaluate the model on the test data.
In the case of ordinal embedding, for example, the ``predict`` method returns the triplet response according to the embedding
and the ``score`` method returns the triplet accuracy (the fraction of correct triplet responses).
@@ -45,18 +104,47 @@ Pytorch backend (CPU/GPU)
-------------------------
The default backend for computations is the ``scipy`` stack, optimized for fast CPU computations and minimal overhead in both compute and disk space.
-However, this comes with limitations when implementing new methods and for calculations with very large data sets.
+However, this comes with limitations when implementing new methods and for calculations with extensive data sets.
As an alternative for some estimators, a ``pytorch`` implementation exists.
- To use this implementation, ``pytorch`` must be installed (see :ref:`extras_install`) and, if necessary,
- the option ``backend='torch'`` must be set (see the respective function documentation).
-These estimators take care automatically of the data transfer between numpy and torch (internal data representation) and
-use a batched optimizer for faster convergence. If a CUDA GPU is available, the computations are automatically performed on the GPU.
+These implementations make use of the ``pytorch`` library in multiple ways:
+First, the data representation is internally transformed to ``pytorch`` tensors, which allows for automatic differentiation (simplifying the implementation of new loss functions)
+Second, the whole computation can run on a GPU, if available.
+Third, the stochastic optimization algorithms (we use Adam as the default) are batched,
+which means that during each optimization iteration, just a fraction of comparisons are used. This stochastic approach can significantly speed up the optimization process
+in the case of extensive datasets. For smaller datasets, the ``pytorch`` implementation is not necessarily faster than the ``scipy`` implementation.
+On the contrary, when starting the optimization, there is a particular runtime overhead.
+The classic second-order optimizers (which we use in the ``scipy`` backend) converge faster if all data is used in each iteration.
+In addition, installing ``pytorch`` required up to 1GB of hard disk space, which is unnecessary for the ``scipy`` backend.
+
+
+To use this implementation, ``pytorch`` must be installed (see :ref:`extras_install`) and, if necessary,
+the option ``backend='torch'`` must be set when instantiating an estimator (see the respective class documentation).
+If a CUDA GPU is available, the computations are automatically performed on the GPU (if the computation should be forced to run on a cpu,
+set the `device` argument).
+
+.. code-block:: python
+
+ from cblearn import embedding, datasets
+
+ X, y = datasets.fetch_material_similarity(return_triplets=True)
+ soe = embedding.SOE(n_components=2, backend='torch', device='cpu')
+ soe.fit(X, y) # this might run a while
+ print(soe.embedding_)
+
+As an optimizer, the `pytorch` backend uses the Adam optimizer by default, which is relatively sensitive to
+the learning rate parameter. The learning rate should be tuned for the specific problem for peak performance,
+for example by using a `grid search`_.
+
+.. _`grid search`: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
-``pytorch`` itself needs a lot of hard disk space and starting the optimization has a certain overhead
-(automatic derivation, data transformation).
- It is therefore advisable to use the ``scipy`` backend by default and only change if necessary.
+.. figure:: ./adam_lr_triplet.png
+ :align: center
+ :width: 300px
+ :alt: Pytorch backend
+ The Adam optimizer's runtime and error depend highly on the learning rate hyperparameter.
+ Here, we show this dependence with a minimal CKL implementation using PyTorch on the :ref:`Vogue Dataset ` (60 objects).
-------------------------
diff --git a/docs/user_guide/torch_speedtest_triplets.png b/docs/user_guide/torch_speedtest_triplets.png
new file mode 100644
index 0000000..041ee15
Binary files /dev/null and b/docs/user_guide/torch_speedtest_triplets.png differ
diff --git a/examples/README.rst b/examples/README.rst
index 8a810fb..e4e909e 100644
--- a/examples/README.rst
+++ b/examples/README.rst
@@ -1,3 +1,5 @@
+.. _examples:
+
Examples
========
diff --git a/examples/ordinal_embedding.py b/examples/ordinal_embedding.py
index 9e27155..5188089 100644
--- a/examples/ordinal_embedding.py
+++ b/examples/ordinal_embedding.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
r"""
.. _example_ordinal_embedding:
+
Ordinal Embedding
=================
diff --git a/paper/.gitignore b/paper/.gitignore
new file mode 100644
index 0000000..daff702
--- /dev/null
+++ b/paper/.gitignore
@@ -0,0 +1,2 @@
+*.pdf
+*.pdf+
diff --git a/paper/images/adam_lr.pdf b/paper/images/adam_lr.pdf
new file mode 100644
index 0000000..45b5f1d
Binary files /dev/null and b/paper/images/adam_lr.pdf differ
diff --git a/paper/images/adam_lr_triplet.pdf b/paper/images/adam_lr_triplet.pdf
new file mode 100644
index 0000000..8670afe
Binary files /dev/null and b/paper/images/adam_lr_triplet.pdf differ
diff --git a/paper/images/car_example.pdf b/paper/images/car_example.pdf
new file mode 100644
index 0000000..282e1f9
Binary files /dev/null and b/paper/images/car_example.pdf differ
diff --git a/paper/images/datasets.pdf b/paper/images/datasets.pdf
new file mode 100644
index 0000000..1643385
Binary files /dev/null and b/paper/images/datasets.pdf differ
diff --git a/paper/images/deltaerror-per-algorithm_cblearn-all.pdf b/paper/images/deltaerror-per-algorithm_cblearn-all.pdf
new file mode 100644
index 0000000..d49f727
Binary files /dev/null and b/paper/images/deltaerror-per-algorithm_cblearn-all.pdf differ
diff --git a/paper/images/deltaerror-per-algorithm_library.pdf b/paper/images/deltaerror-per-algorithm_library.pdf
new file mode 100644
index 0000000..ecfd273
Binary files /dev/null and b/paper/images/deltaerror-per-algorithm_library.pdf differ
diff --git a/paper/images/deltatime-per-algorithm_cblearn-all.pdf b/paper/images/deltatime-per-algorithm_cblearn-all.pdf
new file mode 100644
index 0000000..8e3c8dc
Binary files /dev/null and b/paper/images/deltatime-per-algorithm_cblearn-all.pdf differ
diff --git a/paper/images/deltatime-per-algorithm_library.pdf b/paper/images/deltatime-per-algorithm_library.pdf
new file mode 100644
index 0000000..4acdbe2
Binary files /dev/null and b/paper/images/deltatime-per-algorithm_library.pdf differ
diff --git a/paper/images/time-per-triplets_gpu.pdf b/paper/images/time-per-triplets_gpu.pdf
new file mode 100644
index 0000000..4b8fac9
Binary files /dev/null and b/paper/images/time-per-triplets_gpu.pdf differ
diff --git a/paper/images/torch_speedtest.pdf b/paper/images/torch_speedtest.pdf
new file mode 100644
index 0000000..269616d
Binary files /dev/null and b/paper/images/torch_speedtest.pdf differ
diff --git a/paper/images/torch_speedtest_triplets.pdf b/paper/images/torch_speedtest_triplets.pdf
new file mode 100644
index 0000000..67beeab
Binary files /dev/null and b/paper/images/torch_speedtest_triplets.pdf differ
diff --git a/paper/paper.md b/paper/paper.md
new file mode 100644
index 0000000..8feaf40
--- /dev/null
+++ b/paper/paper.md
@@ -0,0 +1,137 @@
+---
+title: 'cblearn: Comparison-based Machine Learning in Python'
+tags:
+ - Python
+ - Machine Learning
+ - Comparison-based Learning
+ - Ordinal Embedding
+ - Triplets
+ - Behaviour
+ - Clustering
+ - Psychology
+ - Psychophysics
+ - Scaling
+authors:
+ - name: David-Elias Künstle
+ orcid: 0000-0001-5507-3731
+ corresponding: true
+ affiliation: "1, 2"
+ - given-names: Ulrike
+ dropping-particle: von
+ surname: Luxburg
+ affiliation: "1, 2"
+affiliations:
+ - name: University of Tübingen, Germany
+ index: 1
+ - name: Tübingen AI Center, Germany
+ index: 2
+date: 22 September 2023
+bibliography: references.bib
+---
+
+# Summary
+
+The `cblearn` package implements comparison-based machine learning algorithms
+and routines to process comparison-based data in Python.
+Comparison-based learning algorithms are used when only comparisons of similarity between data points are available, but no explicit similarity scores or features.
+For example, humans struggle to assign *numeric* similarities to apples, pears, and bananas.
+Still, they can easily *compare* the similarity of pears and apples with the similarity of apples and bananas---pears and apples usually appear more similar.
+There exist comparison-based algorithms for most machine learning tasks,
+like clustering, regression, or classification [e.g., @balcan2016learning; @heikinheimo2013crowd; @perrot_near-optimal_2020];
+The most frequently applied algorithms, however, are the so-called ordinal embedding algorithms
+[e.g., @agarwal_generalized_2007; @tamuz_adaptively_2011; @van_der_maaten_stochastic_2012; @terada_local_2014;
+@amid2015; @anderton2019scaling; @ghosh2019landmark].
+Ordinal embedding algorithms estimate a metric representation, such that the distances between embedded objects reflect the similarity comparisons.
+These embedding algorithms have recently come into fashion in psychology and cognitive science to quantify the perceived similarity of
+various stimuli objectively
+[e.g., @haghiri_estimation_2020; @wills_toward_2009; @roads_obtaining_2019].
+
+
+
+# Statement of need
+
+This work presents `cblearn`, an open-source Python package for comparison-based learning. Unlike related packages,
+`cblearn` goes beyond specific algorithm implementations to provide an ecosystem for comparison-based data with access to several real-world datasets
+and a collection of algorithm implementations.
+`cblearn` is fast and user-friendly for applications but flexible for research on new algorithms and methods.
+The package integrates well into the scientific Python ecosystem; for example, third-party functions for cross-validation or hyperparameter tuning of `scikit-learn` estimators can typically be used with `cblearn` estimators.
+Although our package is relatively new, it has already been used for algorithm development [@mandal2023revenue] and data analysis in several studies [@schonmann_using_2022; @kunstle_estimating_2022; @van_assen_identifying_2022; @zhao2023perceiving; @fsauerObjectiveMeasurementApproach2024; @huber2024tracing].
+
+We designed `cblearn` as a modular package with functions
+for processing and converting the comparison data in all its varieties (`cblearn.preprocessing`, `cblearn.utils`, `cblearn.metrics`), routines to generate artificial or load real-world datasets (`cblearn.datasets`), and algorithms for ordinal embedding and clustering (`cblearn.embedding`, `cblearn.cluster`).
+
+## Various data formats supported
+
+The atomic datum in comparison-based learning is the quadruplet,
+a comparison of the similarity $\delta$ between two pairs $(i, j)$ and $(k, l)$,
+for example, asserting that $\delta(i, j) < \delta(k, l)$.
+Another popular comparison query, the triplet, can be reduced to a quadruplet with $i == l$.
+Comparison-based learning algorithms estimate classes, clusters, or metrics to fulfill as many quadruplets as possible.
+In ordinal embedding, for example, the problem is to find $x_i, x_j, x_k, x_l \in \mathbb{R}^d$
+s.t. $\left\lVert x_i - x_j \right\rVert_2 < \left\lVert x_k - x_l \right\rVert_2 \Leftrightarrow \delta(i, j) < \delta(k, l)$.
+
+Besides triplets and quadruplets, there are many ways to ask for comparisons.
+Some tasks ask for the "odd-one-out", the "most-central" object, or the two most similar objects to a reference. `cblearn` can load these different queries and convert them to triplets, ready for subsequent embedding or clustering tasks.
+
+Different data types can store triplets and `cblearn` converts them internally.
+A 2D array with three columns for the object indices $(i, j, k)$ stores a triplet per row. In some applications, it is comfortable to separate the comparison "question" and "response", which leads to an additional list of response labels that are $1$, if $\delta(i, j) \le \delta(i, k)$, and $-1$, if $\delta(i, j) > \delta(i, k)$.
+An alternative format stores triplets as a 3-dimensional sparse array.
+These sparse arrays convert fast back and forth to dense 2D arrays while providing an intuitive comparison representation via multidimensional indexing. For example, the identical triplet can be represented as `[[i, j, k]]`, `([[i, k, j]], [-1])` or `sparse_arr[i, j, k] == 1`.
+
+
+## Interfaces to diverse datasets
+
+There is no Iris, CIFAR, or ImageNet in comparison-based learning---the community lacks accessible real-world datasets to evaluate new algorithms.
+`cblearn` provides access to various real-world datasets, summarized in \autoref{fig:datasets}, with functions to download and load the comparisons.
+These datasets---typically comparisons between images or words---consist of human responses.
+Additionally, our package provides preprocessing functions to convert different comparisons to triplets or quadruplets, which many algorithms expect.
+
+data:image/s3,"s3://crabby-images/0956b/0956bd8c6c9aeb7b1f3a7195567933de634e0cdb" alt="Real-world datasets that can be accessed with `cblearn` cover many objects and triplet numbers. Please find a detailed description and references to the dataset in our package documentation. \label{fig:datasets}"{ width=35% }
+
+## Algorithms implemented for CPU and GPU
+
+In the current version `0.3.0`, `cblearn` implements an extensive palette of ordinal embedding algorithms and a clustering algorithm (\autoref{tablealgorithms}); additional algorithms can be contributed easily to the modular design.
+Most algorithm implementations are built with the scientific ecosystem around `scipy` [@virtanenSciPyFundamentalAlgorithms2020;@harris_array_2020] to be fast and lightweight. Inspired by the work of @vankadara_insights_2020, we added GPU implementations with `torch` [@paszke2019pytorch;@anselPyTorchFasterMachine2024] that use stochastic optimization routines known from deep learning methods.
+These GPU implementations can be used with large datasets and rapidly adapted thanks to `torch`'s automated differentiation methods.
+
+: Algorithm implementations in `cblearn`. Most of these come in multiple variants: Different backends for small datasets on CPU and large datasets on GPU as well as variations of objective functions. \label{tablealgorithms}
+
+|Algorithm | Reference |
+|:---------|:---------------|
+|Crowd Kernel Learning | [@tamuz_adaptively_2011] |
+|Fast Ordinal Triplet Embedding | [@jain_finite_2016] |
+|Generalized Non-metric MDS | [@agarwal_generalized_2007] |
+|Maximum-likelihood Difference Scaling | [@maloney_maximum_2003] |
+|Soft Ordinal Embedding | [@terada_local_2014] |
+|Ordinal Embedding Neural Network | [@vankadara_insights_2020] |
+|Stochastic Triplet Embedding | [@van_der_maaten_stochastic_2012] |
+|ComparisonHC (clustering) | [@perrot_near-optimal_2020] |
+
+
+## User-friendly and compatible API
+One of Python's greatest strengths is the scientific ecosystem, into which `cblearn` integrates. Our package does not only make use of this ecosystem internally but adopts their API conventions–––every user of `scikit-learn` [@pedregosa_scikit-learn_2011;@buitinck_api_2013] is already familiar with the API of `cblearn`:
+Estimator objects use the well-known `scikit-learn` methods `.fit(X, y)`, `.transform(X)`, and `.predict(X)`. This convention allows the use of many routines from the `scikit-learn` ecosystem with `cblearn`'s estimators while representing comparisons as `numpy` arrays [@harris_array_2020].
+Interested readers can find a code example in the [Supplementary Material](https://github.com/cblearn/cblearn/blob/joss/paper/supplementary.pdf), which shows in just four lines how to fetch a real-world dataset, preprocess the data, estimate an embedding, and cross-validate the fit. More examples are available in the package's documentation.
+
+# Related work and empirical comparison
+
+Most comparison-based learning algorithms were implemented independently as part of a research paper [e.g., @ghoshdastidar_foundations_2019; @hebartRevealingMultidimensionalMental2020; @van_der_maaten_stochastic_2012; @roads_obtaining_2019];
+Just a few of these implementations, for example `loe` [@terada_local_2014] or `psiz` [@roads_obtaining_2019], come in the form of software packages.
+
+Related packages with collections of comparison-based learning algorithms have a focus on metric learning and crowd-sourced data collection. `metric-learn` [@metric-learn] provides a collection of methods to determine the distance metric from similarity data, including triplets and quadruplets, in a `scikit-learn` compatible API. Data collection packages
+like `NEXT` [@NIPS2015_89ae0fe2] and `salmon` [@Sievert2023] provide active ordinal embedding algorithms to select the most informative comparisons in an experiment efficiently.
+Our package `cblearn`, on the other hand, focuses on providing comparison data and interoperable estimator implementations of the remaining areas of comparison-based learning.
+
+A small empirical comparison to third-party packages reveals that `cblearn`'s algorithm implementations
+typically are accurate and fast. Details are described in [Supplementary Material](https://github.com/cblearn/cblearn/blob/joss/paper/supplementary.pdf).
+A more comprehensive evaluation of various ordinal embedding algorithms per se, focusing on large data sets, can be found in @vankadara_insights_2020.
+
+# Acknowledgements
+We want to thank Debarghya Ghoshdastidar, Leena Vankadara, Siavash Haghiri, Michael Lohaus, and especially Michaël Perrot for the inspiring discussions about comparison-based learning in general and the `cblearn` package in particular.
+Additionally, we thank Thomas Klein for the helpful feedback on this manuscript and Alexander Conzelmann for the contributions to the `cblearn.cluster` module.
+The paper, code, and documentation profited considerably from the feedback of the JOSS editor and reviewers.
+
+This work was funded by the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) under Germany’s Excellence Strategy – EXC number 2064/1 – Project number 390727645.
+The authors thank the International Max Planck Research School for Intelligent Systems (IMPRS-IS) for supporting David-Elias Künstle.
+
+# References
diff --git a/paper/references.bib b/paper/references.bib
new file mode 100644
index 0000000..536b042
--- /dev/null
+++ b/paper/references.bib
@@ -0,0 +1,532 @@
+@article{metric-learn,
+ title = {metric-learn: {M}etric {L}earning {A}lgorithms in {P}ython},
+ author = {{de Vazelhes}, William and {Carey}, CJ and {Tang}, Yuan and
+ {Vauquier}, Nathalie and {Bellet}, Aur{\'e}lien},
+ journal = {Journal of Machine Learning Research},
+ year = {2020},
+ volume = {21},
+ number = {138},
+ pages = {1--6}
+}
+
+@article{fsauerObjectiveMeasurementApproach2024,
+ title = {An Objective Measurement Approach to Quantify the Perceived Distortions of Spectacle Lenses},
+ author = {Sauer, Yannick and K{\"u}nstle, David-Elias and Wichmann, Felix A. and Wahl, Siegfried},
+ year = {2024},
+ month = feb,
+ journal = {Scientific Reports},
+ volume = {14},
+ number = {1},
+ pages = {3967},
+ issn = {2045-2322},
+ doi = {10.1038/s41598-024-54368-3},
+ urldate = {2024-02-19},
+ copyright = {All rights reserved},
+ langid = {english},
+}
+@article{huber2024tracing,
+ title={Tracing Truth Through Conceptual Scaling: Mapping People’s Understanding of Abstract Concepts},
+ author={Huber, Lukas S and K{\"u}nstle, David-Elias and Reuter, Kevin},
+ year={2024},
+ publisher={PsyArXiv},
+ doi={10.31234/osf.io/c42yr}
+}
+
+
+@article{Sievert2023, doi = {10.21105/joss.04517}, url = {https://doi.org/10.21105/joss.04517}, year = {2023}, publisher = {The Open Journal}, volume = {8}, number = {84}, pages = {4517}, author = {Scott Sievert and Robert Nowak and Timothy Rogers}, title = {Efficiently Learning Relative Similarity Embeddings with Crowdsourcing}, journal = {Journal of Open Source Software} }
+
+@inproceedings{NIPS2015_89ae0fe2,
+ author = {Jamieson, Kevin G and Jain, Lalit and Fernandez, Chris and Glattard, Nicholas J. and Nowak, Rob},
+ booktitle = {Advances in Neural Information Processing Systems},
+ editor = {C. Cortes and N. Lawrence and D. Lee and M. Sugiyama and R. Garnett},
+ pages = {},
+ publisher = {Curran Associates, Inc.},
+ title = {NEXT: A System for Real-World Development, Evaluation, and Application of Active Learning},
+ url = {https://proceedings.neurips.cc/paper_files/paper/2015/file/89ae0fe22c47d374bc9350ef99e01685-Paper.pdf},
+ volume = {28},
+ year = {2015}
+}
+
+
+@article{vankadara_insights_2020,
+ title = {Insights into {Ordinal} {Embedding} {Algorithms}: {A} {Systematic} {Evaluation}},
+ shorttitle = {Insights into {Ordinal} {Embedding} {Algorithms}},
+ abstract = {The objective of ordinal embedding is to find a Euclidean representation of a set of abstract items, using only answers to triplet comparisons of the form "Is item \$i\$ closer to the item \$j\$ or item \$k\$?". In recent years, numerous algorithms have been proposed to solve this problem. However, there does not exist a fair and thorough assessment of these embedding methods and therefore several key questions remain unanswered: Which algorithms scale better with increasing sample size or dimension? Which ones perform better when the embedding dimension is small or few triplet comparisons are available? In our paper, we address these questions and provide the first comprehensive and systematic empirical evaluation of existing algorithms as well as a new neural network approach. In the large triplet regime, we find that simple, relatively unknown, non-convex methods consistently outperform all other algorithms, including elaborate approaches based on neural networks or landmark approaches. This finding can be explained by our insight that many of the non-convex optimization approaches do not suffer from local optima. In the low triplet regime, our neural network approach is either competitive or significantly outperforms all the other methods. Our comprehensive assessment is enabled by our unified library of popular embedding algorithms that leverages GPU resources and allows for fast and accurate embeddings of millions of data points.},
+ journal = {arXiv:1912.01666 [cs, stat]},
+ author = {Vankadara, Leena Chennuru and Haghiri, Siavash and Lohaus, Michael and Wahab, Faiz Ul and Luxburg, Ulrike},
+ year = {2021},
+ keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
+ doi = {10.48550/arXiv.1912.01666}
+}
+
+@article{van_assen_identifying_2022,
+ title = {Identifying the behavioural cues of collective flow perception},
+ volume = {22},
+ abstract = {In this study we investigate the visual perception of collective flow. Collective flow depicts agents that show both individual and group behaviour following a relatively simple set of rules (e.g., swarms of insects, flocks of sheep, cyclists in traffic). These collective patterns occur all around us in animate and inanimate systems and on microscopic and macroscopic scales. Ecologically, it can be argued that the human visual system must have developed certain sensitivities for these collective patterns. Even from very low-level depictions we can identify high-level behaviours (e.g., stress, cooperation, leadership), associate these patterns with specific animal groups, and predict future states of these complex patterns. These are skills that potentially generalize to many cognitively demanding tasks. To investigate this, we developed an online engine that simulates biological collective behaviour using six parameters. Here, we concentrate on zone of alignment, zone of attraction, and turning rate. We collected two types of data: 1. A triplet similarity task i.e., which pair of stimuli is more similar, 2. Rating tasks for ten behavioural attributes selected using online experiments and brainstorm sessions. The triplet task was not easy where 38\% of the trials can be considered hard (high intraobserver variability). Using Soft Ordinal Embeddings (SOE) we found that the similarity space is two-dimensional. One of these dimensions is highly correlated with the turning rate, and with nine of the ten behavioural attributes e.g., grouping, cooperation, focus. However, with the attributes explored here we were not able to clearly identify the second dimension of the similarity space. The dominant correlations with the turning rate seem to overshadow intriguing, more subtle non-linear tendencies of the behavioural ratings. In this study we applied a range of methods that allowed us to increase understanding and identify behavioural cues we employ to perceive the versatile space of collective flow.},
+ number = {14},
+ journal = {Journal of Vision},
+ author = {van Assen, Jan Jaap R. and Pont, Sylvia C.},
+ year = {2022},
+ pages = {3985},
+ doi = {10.1167/jov.22.14.3985}
+}
+
+@article{kunstle_estimating_2022,
+ title = {Estimating the perceived dimension of psychophysical stimuli using triplet accuracy and hypothesis testing},
+ volume = {22},
+ copyright = {All rights reserved},
+ abstract = {Vision researchers are interested in mapping complex physical stimuli to perceptual dimensions. Such a mapping can be constructed using multidimensional psychophysical scaling or ordinaljt4 embedding methods. Both methods infer coordinates that agree as much as possible with the observer’s judgments so that perceived similarity corresponds with distance in the inferred space. However, a fundamental problem of all methods that construct scalings in multiple dimensions is that the inferred representation can only reflect perception if the scale has the correct dimension. Here we propose a statistical procedure to overcome this limitation. The critical elements of our procedure are i) measuring the scale’s quality by the number of correctly predicted triplets and ii) performing a statistical test to assess if adding another dimension to the scale improves triplet accuracy significantly. We validate our procedure through extensive simulations. In addition, we study the properties and limitations of our procedure using “real” data from various behavioral datasets from psychophysical experiments. We conclude that our procedure can reliably identify (a lower bound on) the number of perceptual dimensions for a given dataset.},
+ number = {13},
+ journal = {Journal of Vision},
+ author = {Künstle, David-Elias and Luxburg, Ulrike and Wichmann, Felix A.},
+ year = {2022},
+ pages = {5},
+ doi = {10.1167/jov.22.13.5}
+}
+
+@article{schonmann_using_2022,
+ title = {Using an {Odd}-{One}-{Out} {Design} {Affects} {Consistency}, {Agreement} and {Decision} {Criteria} in {Similarity} {Judgement} {Tasks} {Involving} {Natural} {Images}.},
+ volume = {22},
+ copyright = {All rights reserved},
+ abstract = {Recently, similarity judgement tasks have been employed to estimate the perceived similarity of natural images (Hebart, Zheng, Pereira, \& Baker, 2020). Such tasks typically take the form of triplet questions in which participants are presented with a reference image and two additional images and are asked to indicate which of the two is more similar to the reference. Alternatively, participants can be presented with three images and asked to indicate the odd one out. Though both questions are mathematically similar, they might affect participants’ decision criteria, the agreement among observers, or the consistency of single observers—these possibilities have hitherto not been assessed. To address these issues, we presented four observers with triplets from three image sets designed to juxtapose different perceptual and conceptual features. Using a soft ordinal embedding algorithm—a machine learning version of a multidimensional scaling—we represented the images in a two-dimensional space such that the Euclidean distances between images reflected observers' choices. Agreement between observers was assessed through a leave-one-out procedure in which embeddings based on three observers served to predict the respective fourth observer's choices. Consistency was calculated as the proportion of identical choices in a repeat session. Here we show that design choices in similarity judgement tasks can indeed affect results. The odd-one-out design resulted in greater embedding accuracy, higher agreement among, and higher consistency within observers. Hence, an individual observer's choices could be better predicted in the odd-one-out than in the triplet design. However, predicting individual responses was only possible for image sets for which participants could report a predominant relationship. Otherwise, predictability dropped to close to chance level. Our results suggest that seemingly innocuous experimental variations—standard triplet versus odd-one-out—can have a strong influence on the resulting perceptual spaces. Furthermore, we note severe limitations regarding the predictive power of models relying on pooled observer data.},
+ number = {14},
+ journal = {Journal of Vision},
+ author = {Schönmann, Inés and Künstle, David-Elias and Wichmann, Felix A.},
+ year = {2022},
+ pages = {3232},
+ doi = {10.1167/jov.22.14.3232}
+}
+
+@inproceedings{roads_enriching_2021,
+ title = {Enriching {ImageNet} with {Human} {Similarity} {Judgments} and {Psychological} {Embeddings}},
+ abstract = {Advances in supervised learning approaches to object recognition flourished in part because of the availability of high-quality datasets and associated benchmarks. However, these benchmarks—such as ILSVRC—are relatively task-specific, focusing predominately on predicting class labels. We introduce a publicly-available dataset that embodies the task-general capabilities of human perception and reasoning. The Human Similarity Judgments extension to ImageNet (ImageNet-HSJ) is composed of a large set of human similarity judgments that supplements the existing ILSVRC validation set. The new dataset supports a range of task and performance metrics, including evaluation of unsupervised algorithms. We demonstrate two methods of assessment: using the similarity judgments directly and using a psychological embedding trained on the similarity judgments. This embedding space contains an order of magnitude more points (i.e., images) than previous efforts based on human judgments. We were able to scale to the full 50,000 image ILSVRC validation set through a selective sampling process that used variational Bayesian inference and model ensembles to sample aspects of the embedding space that were most uncertain. To demonstrate the utility of ImageNet-HSJ, we used the similarity ratings and the embedding space to evaluate how well several popular models conform to human similarity judgments. One finding is that the more complex models that perform better on task-specific benchmarks do not better conform to human semantic judgments. In addition to the human similarity judgments, pre-trained psychological embeddings and code for inferring variational embeddings are made publicly available. ImageNet-HSJ supports the appraisal of internal representations and the development of more humanlike models.},
+ booktitle = {{Conference} on {Computer} {Vision} and {Pattern} {Recognition} ({CVPR})},
+ author = {Roads, Brett D. and Love, Bradley C.},
+ year = {2021},
+ keywords = {Benchmark testing, Computer vision, Focusing, Measurement, Psychology, Semantics, Supervised learning},
+ doi = {10.1109/cvpr46437.2021.00355}
+}
+
+@article{jain_finite_2016,
+ title = {Finite {Sample} {Prediction} and {Recovery} {Bounds} for {Ordinal} {Embedding}},
+ language = {en},
+ journal = {Advances in Neural Information Processing Systems (NeurIPS)},
+ author = {Jain, Lalit and Jamieson, Kevin G. and Nowak, Rob},
+ year = {2016},
+ keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, bounds},
+}
+
+@inproceedings{van_der_maaten_stochastic_2012,
+ title = {Stochastic triplet embedding},
+ abstract = {This paper considers the problem of learning an embedding of data based on similarity triplets of the form “A is more similar to B than to C”. This learning setting is of relevance to scenarios in which we wish to model human judgements on the similarity of objects. We argue that in order to obtain a truthful embedding of the underlying data, it is insufficient for the embedding to satisfy the constraints encoded by the similarity triplets. In particular, we introduce a new technique called t-Distributed Stochastic Triplet Embedding (t-STE) that collapses similar points and repels dissimilar points in the embedding — even when all triplet constraints are satisfied. Our experimental evaluation on three data sets shows that as a result, t-STE is much better than existing techniques at revealing the underlying data structure.},
+ language = {en},
+ booktitle = {{International} {Workshop} on {Machine} {Learning} for {Signal} {Processing}},
+ author = {van der Maaten, Laurens and Weinberger, Kilian},
+ year = {2012},
+ pages = {1--6},
+ doi = {10.1109/MLSP.2012.6349720}
+}
+
+@article{maloney_maximum_2003,
+ title = {Maximum likelihood difference scaling},
+ volume = {3},
+ number = {8},
+ journal = {Journal of Vision},
+ author = {Maloney, Laurence T and Yang, Joong Nam},
+ year = {2003},
+ pages = {5--5},
+ doi = {10.1167/3.8.5}
+}
+
+@InProceedings{agarwal_generalized_2007,
+ title = {Generalized Non-metric Multidimensional Scaling},
+ author = {Agarwal, Sameer and Wills, Josh and Cayton, Lawrence and Lanckriet, Gert and Kriegman, David and Belongie, Serge}, booktitle = {Proceedings of the Eleventh International Conference on Artificial Intelligence and Statistics}, pages = {11--18}, year = {2007}, editor = {Meila, Marina and Shen, Xiaotong}, volume = {2}, series = {Proceedings of Machine Learning Research}, address = {San Juan, Puerto Rico}, month = {21--24 Mar}, publisher = {PMLR}, pdf = {http://proceedings.mlr.press/v2/agarwal07a/agarwal07a.pdf}, url = {https://proceedings.mlr.press/v2/agarwal07a.html}}
+
+
+@inproceedings{terada_local_2014,
+ title = {Local ordinal embedding},
+ booktitle = {International {Conference} on {Machine} {Learning} (ICML)},
+ author = {Terada, Yoshikazu and Luxburg, Ulrike},
+ year = {2014},
+}
+
+@article{wills_toward_2009,
+ title = {Toward a perceptual space for gloss},
+ volume = {28},
+ language = {en},
+ number = {4},
+ journal = {ACM Transactions on Graphics},
+ author = {Wills, Josh and Agarwal, Sameer and Kriegman, David and Belongie, Serge},
+ year = {2009},
+ pages = {1--15},
+ doi = {10.1145/1559755.1559760},
+}
+
+@article{aguilar_comparing_2017,
+ title = {Comparing sensitivity estimates from {MLDS} and forced-choice methods in a slant-from-texture experiment},
+ volume = {17},
+ language = {en},
+ number = {1},
+ journal = {Journal of Vision},
+ author = {Aguilar, Guillermo and Wichmann, Felix A. and Maertens, Marianne},
+ year = {2017},
+ pages = {37},
+ doi = {10.1167/17.1.37},
+}
+
+@article{aguilar_toward_2020,
+ title = {Toward reliable measurements of perceptual scales in multiple contexts},
+ volume = {20},
+ language = {en},
+ number = {4},
+ urldate = {2020-05-18},
+ journal = {Journal of Vision},
+ author = {Aguilar, Guillermo and Maertens, Marianne},
+ year = {2020},
+ pages = {19},
+ doi = {10.1167/jov.20.4.19},
+}
+
+@article{haghiri_estimation_2020,
+ title = {Estimation of perceptual scales using ordinal embedding},
+ volume = {20},
+ abstract = {In this article, we address the problem of measuring and analyzing sensation, the subjective magnitude of one’s experience. We do this in the context of the method of triads: The sensation of the stimulus is evaluated via relative judgments of the following form: “Is stimulus {\textbackslash}(S\_i{\textbackslash}) more similar to stimulus {\textbackslash}(S\_j{\textbackslash}) or to stimulus {\textbackslash}(S\_k{\textbackslash})?” We propose to use ordinal embedding methods from machine learning to estimate the scaling function from the relative judgments. We review two relevant and well-known methods in psychophysics that are partially applicable in our setting: nonmetric multidimensional scaling (NMDS) and the method of maximum likelihood difference scaling (MLDS). Considering various scaling functions, we perform an extensive set of simulations to demonstrate the performance of the ordinal embedding methods. We show that in contrast to existing approaches, our ordinal embedding approach allows, first, to obtain reasonable scaling functions from comparatively few relative judgments and, second, to estimate multidimensional perceptual scales. In addition to the simulations, we analyze data from two real psychophysics experiments using ordinal embedding methods. Our results show that in the one-dimensional perceptual scale, our ordinal embedding approach works as well as MLDS, while in higher dimensions, only our ordinal embedding methods can produce a desirable scaling function. To make our methods widely accessible, we provide an R-implementation and general rules of thumb on how to use ordinal embedding in the context of psychophysics.},
+ number = {9},
+ journal = {Journal of Vision},
+ author = {Haghiri, Siavash and Wichmann, Felix A. and Luxburg, Ulrike},
+ year = {2020},
+ pages = {14},
+ doi = {10.1167/jov.20.9.14},
+}
+
+@article{devinck_common_2012,
+ title = {A common signal detection model accounts for both perception and discrimination of the watercolor effect},
+ volume = {12},
+ issn = {1534-7362},
+ abstract = {Establishing the relation between perception and discrimination is a fundamental objective in psychophysics, with the goal of characterizing the neural mechanisms mediating perception. Here, we show that a procedure for estimating a perceptual scale based on a signal detection model also predicts discrimination performance. We use a recently developed procedure, Maximum Likelihood Difference Scaling (MLDS), to measure the perceptual strength of a long-range, color, filling-in phenomenon, the Watercolor Effect (WCE), as a function of the luminance ratio between the two components of its generating contour. MLDS is based on an equal-variance, Gaussian, signal detection model and yields a perceptual scale with interval properties. The strength of the fill-in percept increased 10–15 times the estimate of the internal noise level for a 3-fold increase in the luminance ratio. Each observer’s estimated scale predicted discrimination performance in a subsequent paired-comparison task. A common signal detection model accounts for both the appearance and discrimination data. Since signal detection theory provides a common metric for relating discrimination performance and neural response, the results have implications for comparing perceptual and neural response functions.},
+ language = {en},
+ journal = {Journal of Vision},
+ author = {Devinck, F. and Knoblauch, K.},
+ year = {2012},
+ pages = {19--19},
+ doi = {10.1167/12.3.19},
+}
+
+@inproceedings{kingma2014adam,
+ author = {Diederik P. Kingma and
+ Jimmy Ba},
+ editor = {Yoshua Bengio and
+ Yann LeCun},
+ title = {Adam: {A} Method for Stochastic Optimization},
+ booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015,
+ San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings},
+ year = {2015},
+ url = {http://arxiv.org/abs/1412.6980},
+ timestamp = {Thu, 25 Jul 2019 14:25:37 +0200},
+ biburl = {https://dblp.org/rec/journals/corr/KingmaB14.bib},
+ bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+@inproceedings{heikinheimo2013crowd,
+ title={The crowd-median algorithm},
+ author={Heikinheimo, Hannes and Ukkonen, Antti},
+ booktitle={Proceedings of the AAAI Conference on Human Computation and Crowdsourcing},
+ volume={1},
+ pages={69--77},
+ year={2013}
+}
+@article{paszke2019pytorch,
+ title={Pytorch: An imperative style, high-performance deep learning library},
+ author={Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and others},
+ journal={Advances in neural information processing systems},
+ volume={32},
+ year={2019}
+}
+@inproceedings{anselPyTorchFasterMachine2024,
+ title = {{{PyTorch}} 2: {{Faster Machine Learning Through Dynamic Python Bytecode Transformation}} and {{Graph Compilation}}},
+ shorttitle = {{{PyTorch}} 2},
+ booktitle = {Proceedings of the 29th {{ACM International Conference}} on {{Architectural Support}} for {{Programming Languages}} and {{Operating Systems}}, {{Volume}} 2},
+ author = {Ansel, Jason and Yang, Edward and He, Horace and Gimelshein, Natalia and Jain, Animesh and Voznesensky, Michael and Bao, Bin and Bell, Peter and Berard, David and Burovski, Evgeni and Chauhan, Geeta and Chourdia, Anjali and Constable, Will and Desmaison, Alban and DeVito, Zachary and Ellison, Elias and Feng, Will and Gong, Jiong and Gschwind, Michael and Hirsh, Brian and Huang, Sherlock and Kalambarkar, Kshiteej and Kirsch, Laurent and Lazos, Michael and Lezcano, Mario and Liang, Yanbo and Liang, Jason and Lu, Yinghai and Luk, C. K. and Maher, Bert and Pan, Yunjie and Puhrsch, Christian and Reso, Matthias and Saroufim, Mark and Siraichi, Marcos Yukio and Suk, Helen and Zhang, Shunting and Suo, Michael and Tillet, Phil and Zhao, Xu and Wang, Eikan and Zhou, Keren and Zou, Richard and Wang, Xiaodong and Mathews, Ajit and Wen, William and Chanan, Gregory and Wu, Peng and Chintala, Soumith},
+ year = {2024},
+ month = apr,
+ pages = {929--947},
+ publisher = {ACM},
+ address = {La Jolla CA USA},
+ doi = {10.1145/3620665.3640366},
+ urldate = {2024-05-05},
+ isbn = {9798400703850},
+ langid = {english},
+}
+@article{virtanenSciPyFundamentalAlgorithms2020,
+ title = {{{SciPy}} 1.0: Fundamental Algorithms for Scientific Computing in {{Python}}},
+ shorttitle = {{{SciPy}} 1.0},
+ author = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and Haberland, Matt and Reddy, Tyler and Cournapeau, David and Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and Bright, Jonathan and {van der Walt}, St{\'e}fan J. and Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and Kern, Robert and Larson, Eric and Carey, C. J. and Polat, {\.I}lhan and Feng, Yu and Moore, Eric W. and VanderPlas, Jake and Laxalde, Denis and Perktold, Josef and Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and Harris, Charles R. and Archibald, Anne M. and Ribeiro, Ant{\^o}nio H. and Pedregosa, Fabian and {van Mulbregt}, Paul},
+ year = {2020},
+ month = mar,
+ journal = {Nature Methods},
+ volume = {17},
+ number = {3},
+ pages = {261--272},
+ publisher = {Nature Publishing Group},
+ issn = {1548-7105},
+ doi = {10.1038/s41592-019-0686-2},
+ urldate = {2024-05-05},
+ abstract = {SciPy is an open-source scientific computing library for the Python programming language. Since its initial release in 2001, SciPy has become a de facto standard for leveraging scientific algorithms in Python, with over 600 unique code contributors, thousands of dependent packages, over 100,000 dependent repositories and millions of downloads per year. In this work, we provide an overview of the capabilities and development practices of SciPy 1.0 and highlight some recent technical developments.},
+ copyright = {2020 The Author(s)},
+ langid = {english},
+ keywords = {Biophysical chemistry,Computational biology and bioinformatics,Technology},
+}
+@article{harris_array_2020,
+ title = {Array Programming with {{NumPy}}},
+ author = {Harris, Charles R. and Millman, K. Jarrod and {van der Walt}, St{\'e}fan J. and Gommers, Ralf and Virtanen, Pauli and Cournapeau, David and Wieser, Eric and Taylor, Julian and Berg, Sebastian and Smith, Nathaniel J. and Kern, Robert and Picus, Matti and Hoyer, Stephan and {van Kerkwijk}, Marten H. and Brett, Matthew and Haldane, Allan and {del R{\'i}o}, Jaime Fern{\'a}ndez and Wiebe, Mark and Peterson, Pearu and {G{\'e}rard-Marchant}, Pierre and Sheppard, Kevin and Reddy, Tyler and Weckesser, Warren and Abbasi, Hameer and Gohlke, Christoph and Oliphant, Travis E.},
+ year = {2020},
+ journal = {Nature},
+ volume = {585},
+ number = {7825},
+ pages = {357--362},
+ doi = {10.1038/s41586-020-2649-2},
+ abstract = {Array programming provides a powerful, compact and expressive syntax for accessing, manipulating and operating on data in vectors, matrices and higher-dimensional arrays. NumPy is the primary array programming library for the Python language. It has an essential role in research analysis pipelines in fields as diverse as physics, chemistry, astronomy, geoscience, biology, psychology, materials science, engineering, finance and economics. For example, in astronomy, NumPy was an important part of the software stack used in the discovery of gravitational waves1 and in the first imaging of a black hole2. Here we review how a few fundamental array concepts lead to a simple and powerful programming paradigm for organizing, exploring and analysing scientific data. NumPy is the foundation upon which the scientific Python ecosystem is constructed. It is so pervasive that several projects, targeting audiences with specialized needs, have developed their own NumPy-like interfaces and array objects. Owing to its central position in the ecosystem, NumPy increasingly acts as an interoperability layer between such array computation libraries and, together with its application programming interface (API), provides a flexible framework to support the next decade of scientific and industrial analysis.},
+ copyright = {2020 The Author(s)},
+ langid = {english},
+ keywords = {Computational neuroscience,Computational science,Computer science,Software,Solar physics}
+}
+@article{hebartRevealingMultidimensionalMental2020,
+ title = {Revealing the multidimensional mental representations of natural objects underlying human similarity judgements},
+ volume = {4},
+ copyright = {2020 This is a U.S. government work and not under copyright protection in the U.S.; foreign copyright protection may apply},
+ abstract = {Objects can be characterized according to a vast number of possible criteria (such as animacy, shape, colour and function), but some dimensions are more useful than others for making sense of the objects around us. To identify these core dimensions of object representations, we developed a data-driven computational model of similarity judgements for real-world images of 1,854 objects. The model captured most explainable variance in similarity judgements and produced 49 highly reproducible and meaningful object dimensions that reflect various conceptual and perceptual properties of those objects. These dimensions predicted external categorization behaviour and reflected typicality judgements of those categories. Furthermore, humans can accurately rate objects along these dimensions, highlighting their interpretability and opening up a way to generate similarity estimates from object dimensions alone. Collectively, these results demonstrate that human similarity judgements can be captured by a fairly low-dimensional, interpretable embedding that generalizes to external behaviour. Hebart et al. developed a computational model of similarity judgements for 1,854 natural objects. The model accurately predicted similarity and revealed 49 interpretable dimensions that reflect both perceptual and conceptual object properties.},
+ language = {en},
+ number = {11},
+ journal = {Nature Human Behaviour},
+ author = {Hebart, Martin N. and Zheng, Charles Y. and Pereira, Francisco and Baker, Chris I.},
+ year = {2020},
+ pages = {1173--1185},
+ doi = {10.1038/s41562-020-00951-3},
+}
+
+@inproceedings{tamuz_adaptively_2011,
+ title = {Adaptively learning the crowd kernel},
+ abstract = {We introduce an algorithm that, given n objects, learns a similarity matrix over all n2 pairs, from crowdsourced data alone. The algorithm samples responses to adaptively chosen triplet-based relative-similarity queries. Each query has the form "is object a more similar to b or to c?" and is chosen to be maximally informative given the preceding responses. The output is an embedding of the objects into Euclidean space (like MDS); we refer to this as the "crowd kernel." SVMs reveal that the crowd kernel captures prominent and subtle features across a number of domains, such as "is striped" among neckties and "vowel vs. consonant" among letters.},
+ booktitle = {Proceedings of the 28th {International} {Conference} on {International} {Conference} on {Machine} {Learning} (ICML)},
+ author = {Tamuz, Omer and Liu, Ce and Belongie, Serge and Shamir, Ohad and Kalai, Adam Tauman},
+ year = {2011},
+}
+
+@article{roads_obtaining_2019,
+ title = {Obtaining psychological embeddings through joint kernel and metric learning},
+ volume = {51},
+ abstract = {Psychological embeddings provide a powerful formalism for characterizing human-perceived similarity among members of a stimulus set. Obtaining high-quality embeddings can be costly due to algorithm design, software deployment, and participant compensation. This work aims to advance state-of-the-art embedding techniques and provide a comprehensive software package that makes obtaining high-quality psychological embeddings both easy and relatively efficient. Contributions are made on four fronts. First, the embedding procedure allows multiple trial configurations (e.g., triplets) to be used for collecting similarity judgments from participants. For example, trials can be configured to collect triplet comparisons or to sort items into groups. Second, a likelihood model is provided for three classes of similarity kernels allowing users to easily infer the parameters of their preferred model using gradient descent. Third, an active selection algorithm is provided that makes data collection more efficient by proposing comparisons that provide the strongest constraints on the embedding. Fourth, the likelihood model allows the specification of group-specific attention weight parameters. A series of experiments are included to highlight each of these contributions and their impact on converging to a high-quality embedding. Collectively, these incremental improvements provide a powerful and complete set of tools for inferring psychological embeddings. The relevant tools are available as the Python package PsiZ, which can be cloned from GitHub (https://github.com/roads/psiz).},
+ language = {en},
+ number = {5},
+ journal = {Behavior Research Methods},
+ author = {Roads, Brett D. and Mozer, Michael C.},
+ year = {2019},
+ pages = {2180--2193},
+ doi = {10.3758/s13428-019-01285-3},
+}
+
+@inproceedings{haghiri_comparison-based_2018,
+ title = {Comparison-{Based} {Random} {Forests}},
+ abstract = {Assume we are given a set of items from a general metric space, but we neither have access to the representation of the data nor to the distances between data points. Instead, suppose that we can actively choose a triplet of items (A, B, C) and ask an oracle whether item A is closer to item B or to item C. In this paper, we propose a novel random forest algorithm for regression and classification that relies only on such triplet comparisons. In the theory part of this paper, we establish sufficient conditions for the consistency of such a forest. In a set of comprehensive experiments, we then demonstrate that the proposed random forest is efficient both for classification and regression. In particular, it is even competitive with other methods that have direct access to the metric representation of the data.},
+ language = {en},
+ booktitle = {Proceedings of the 35th {International} {Conference} on {Machine} {Learning} (ICML)},
+ author = {Haghiri, Siavash and Garreau, Damien and Luxburg, Ulrike},
+ year = {2018},
+}
+
+@article{demiralp_learning_2014,
+ title = {Learning {Perceptual} {Kernels} for {Visualization} {Design}},
+ volume = {20},
+ abstract = {Visualization design can benefit from careful consideration of perception, as different assignments of visual encoding variables such as color, shape and size affect how viewers interpret data. In this work, we introduce perceptual kernels: distance matrices derived from aggregate perceptual judgments. Perceptual kernels represent perceptual differences between and within visual variables in a reusable form that is directly applicable to visualization evaluation and automated design. We report results from crowd-sourced experiments to estimate kernels for color, shape, size and combinations thereof. We analyze kernels estimated using five different judgment types--including Likert ratings among pairs, ordinal triplet comparisons, and manual spatial arrangement--and compare them to existing perceptual models. We derive recommendations for collecting perceptual similarities, and then demonstrate how the resulting kernels can be applied to automate visualization design decisions.},
+ language = {eng},
+ number = {12},
+ journal = {IEEE Transactions on Visualization and Computer Graphics},
+ author = {Demiralp, Çağatay and Bernstein, Michael S. and Heer, Jeffrey},
+ year = {2014},
+ pages = {1933--1942},
+ doi = {10.1109/TVCG.2014.2346978},
+}
+
+@article{toscani_three_2020,
+ title = {Three {Perceptual} {Dimensions} for {Specular} and {Diffuse} {Reflection}},
+ volume = {17},
+ abstract = {Previous research investigated the perceptual dimensionality of achromatic reflection of opaque surfaces, by using either simple analytic models of reflection or measured reflection properties of a limited sample of materials. Here, we aim to extend this work to a broader range of simulated materials. In a first experiment, we used sparse multidimensional scaling techniques to represent a set of rendered stimuli in a perceptual space that is consistent with participants’ similarity judgments. Participants were presented with one reference object and four comparisons, rendered with different material properties. They were asked to rank the comparisons according to their similarity to the reference, resulting in an efficient collection of a large number of similarity judgments. To interpret the space individuated by multidimensional scaling, we ran a second experiment in which observers were asked to rate our experimental stimuli according to a list of 30 adjectives referring to their surface reflectance properties. Our results suggest that perception of achromatic reflection is based on at least three dimensions, which we labelled “Lightness,” “Gloss,” and “Metallicity,” in accordance with the rating results. These dimensions are characterized by a relatively simple relationship with the parameters of the physically based rendering model used to generate our stimuli, indicating that they correspond to different physical properties of the rendered materials. Specifically, “Lightness” relates to diffuse reflections, “Gloss” to the presence of high contrast sharp specular highlights, and “Metallicity” to spread out specular reflections.},
+ number = {2},
+ journal = {ACM Transactions on Applied Perception},
+ author = {Toscani, Matteo and Guarnera, Dar’ya and Guarnera, Giuseppe Claudio and Hardeberg, Jon Yngve and Gegenfurtner, Karl R.},
+ year = {2020},
+ keywords = {BRDF, Perception, dimensionality},
+ pages = {6:1--6:26},
+ doi = {10.1145/3380741},
+}
+
+@article{pedregosa_scikit-learn_2011,
+ title = {Scikit-learn: {Machine} {Learning} in {Python}},
+ volume = {12},
+ shorttitle = {Scikit-learn},
+ abstract = {Scikit-learn is a Python module integrating a wide range of state-of-the-art machine learning algorithms for medium-scale supervised and unsupervised problems. This package focuses on bringing machine learning to non-specialists using a general-purpose high-level language. Emphasis is put on ease of use, performance, documentation, and API consistency. It has minimal dependencies and is distributed under the simplified BSD license, encouraging its use in both academic and commercial settings. Source code, binaries, and documentation can be downloaded from http://scikit-learn.sourceforge.net.},
+ number = {85},
+ urldate = {2022-07-01},
+ journal = {Journal of Machine Learning Research (JMLR)},
+ author = {Pedregosa, Fabian and Varoquaux, Gaël and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and Duchesnay, Édouard},
+ year = {2011},
+ pages = {2825--2830},
+}
+
+@article{buitinck_api_2013,
+ title={{API} design for machine learning software: experiences from the scikit-learn project},
+ author={Lars Buitinck and Gilles Louppe and Mathieu Blondel and Fabian Pedregosa and Andreas Mueller and Olivier Grisel and Vlad Niculae and Peter Prettenhofer and Alexandre Gramfort and Jaques Grobler and Robert Layton and Jake Vanderplas and Arnaud Joly and Brian Holt and Gaël Varoquaux},
+ year={2013},
+ journal={arXiv:1309.0238 [cs.LG]},
+ doi = {10.48550/arXiv.1309.0238},
+}
+@inproceedings{ghoshdastidar_foundations_2019,
+ title = {Foundations of {Comparison}-{Based} {Hierarchical} {Clustering}},
+ abstract = {We address the classical problem of hierarchical clustering, but in a framework where one does not have access to a representation of the objects or their pairwise similarities. Instead, we assume that only a set of comparisons between objects is available, that is, statements of the form objects i and j are more similar than objects k and l.'' Such a scenario is commonly encountered in crowdsourcing applications. The focus of this work is to develop comparison-based hierarchical clustering algorithms that do not rely on the principles of ordinal embedding. We show that single and complete linkage are inherently comparison-based and we develop variants of average linkage. We provide statistical guarantees for the different methods under a planted hierarchical partition model. We also empirically demonstrate the performance of the proposed approaches on several datasets.},
+ booktitle = {Advances in {Neural} {Information} {Processing} {Systems} (NeurIPS)},
+ author = {Ghoshdastidar, Debarghya and Perrot, Michaël and Luxburg, Ulrike},
+ year = {2019},
+}
+
+@article{harris_array_2020,
+ title = {Array programming with {NumPy}},
+ volume = {585},
+ copyright = {2020 The Author(s)},
+ abstract = {Array programming provides a powerful, compact and expressive syntax for accessing, manipulating and operating on data in vectors, matrices and higher-dimensional arrays. NumPy is the primary array programming library for the Python language. It has an essential role in research analysis pipelines in fields as diverse as physics, chemistry, astronomy, geoscience, biology, psychology, materials science, engineering, finance and economics. For example, in astronomy, NumPy was an important part of the software stack used in the discovery of gravitational waves1 and in the first imaging of a black hole2. Here we review how a few fundamental array concepts lead to a simple and powerful programming paradigm for organizing, exploring and analysing scientific data. NumPy is the foundation upon which the scientific Python ecosystem is constructed. It is so pervasive that several projects, targeting audiences with specialized needs, have developed their own NumPy-like interfaces and array objects. Owing to its central position in the ecosystem, NumPy increasingly acts as an interoperability layer between such array computation libraries and, together with its application programming interface (API), provides a flexible framework to support the next decade of scientific and industrial analysis.},
+ language = {en},
+ number = {7825},
+ journal = {Nature},
+ author = {Harris, Charles R. and Millman, K. Jarrod and van der Walt, Stéfan J. and Gommers, Ralf and Virtanen, Pauli and Cournapeau, David and Wieser, Eric and Taylor, Julian and Berg, Sebastian and Smith, Nathaniel J. and Kern, Robert and Picus, Matti and Hoyer, Stephan and van Kerkwijk, Marten H. and Brett, Matthew and Haldane, Allan and del Río, Jaime Fernández and Wiebe, Mark and Peterson, Pearu and Gérard-Marchant, Pierre and Sheppard, Kevin and Reddy, Tyler and Weckesser, Warren and Abbasi, Hameer and Gohlke, Christoph and Oliphant, Travis E.},
+ year = {2020},
+ keywords = {Computational neuroscience, Computational science, Computer science, Software, Solar physics},
+ pages = {357--362},
+ doi = {10.1038/s41586-020-2649-2},
+}
+
+@inproceedings{perrot_near-optimal_2020,
+ title = {Near-optimal comparison based clustering},
+ booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
+ author = {Perrot, Michaël and Esser, Pascal and Ghoshdastidar, Debarghya},
+ editor = {Larochelle, H. and Ranzato, M. and Hadsell, R. and Balcan, M.F. and Lin, H.},
+ year = {2020},
+}
+@inproceedings{heikinheimo2013crowd,
+ title={The crowd-median algorithm},
+ author={Heikinheimo, Hannes and Ukkonen, Antti},
+ booktitle={Proceedings of the AAAI Conference on Human Computation and Crowdsourcing},
+ volume={1},
+ pages={69--77},
+ year={2013},
+ doi = {10.1609/hcomp.v1i1.13079}
+}
+
+@InProceedings{amid2015,
+ title = {Multiview Triplet Embedding: Learning Attributes in Multiple Maps},
+ author = {Amid, Ehsan and Ukkonen, Antti},
+ booktitle = {Proceedings of the 32nd International Conference on Machine Learning},
+ pages = {1472--1480},
+ year = {2015},
+ editor = {Bach, Francis and Blei, David},
+ volume = {37},
+ series = {Proceedings of Machine Learning Research},
+ address = {Lille, France},
+ month = {07--09 Jul},
+ publisher = {PMLR},
+ abstract = {For humans, it is usually easier to make statements about the similarity of objects in relative, rather than absolute terms. Moreover, subjective comparisons of objects can be based on a number of different and independent attributes. For example, objects can be compared based on their shape, color, etc. In this paper, we consider the problem of uncovering these hidden attributes given a set of relative distance judgments in the form of triplets. The attribute that was used to generate a particular triplet in this set is unknown. Such data occurs, e.g., in crowdsourcing applications where the triplets are collected from a large group of workers. We propose the Multiview Triplet Embedding (MVTE) algorithm that produces a number of low-dimensional maps, each corresponding to one of the hidden attributes. The method can be used to assess how many different attributes were used to create the triplets, as well as to assess the difficulty of a distance comparison task, and find objects that have multiple interpretations in relation to the other objects.}
+}
+@inproceedings{balcan2016learning,
+ title={Learning combinatorial functions from pairwise comparisons},
+ author={Balcan, Maria-Florina and Vitercik, Ellen and White, Colin},
+ booktitle={Conference on Learning Theory},
+ pages={310--335},
+ year={2016},
+ organization={PMLR}
+}
+@inproceedings{anderton2019scaling,
+ title={Scaling up ordinal embedding: A landmark approach},
+ author={Anderton, Jesse and Aslam, Javed},
+ booktitle={International Conference on Machine Learning},
+ pages={282--290},
+ year={2019},
+ organization={PMLR}
+}
+@inproceedings{bower2018landscape,
+ title={The landscape of non-convex quadratic feasibility},
+ author={Bower, Amanda and Jain, Lalit and Balzano, Laura},
+ booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
+ pages={3974--3978},
+ year={2018},
+ organization={IEEE},
+ doi={10.1109/icassp.2018.8461868}
+}
+@inproceedings{ghosh2019landmark,
+ title = {Landmark Ordinal Embedding},
+ booktitle = {Advances in Neural Information Processing Systems},
+ author = {Ghosh, Nikhil and Chen, Yuxin and Yue, Yisong},
+ editor = {Wallach, H. and Larochelle, H. and Beygelzimer, A. and {dAlch{\'e}-Buc}, F. and Fox, E. and Garnett, R.},
+ year = {2019},
+ volume = {32},
+ publisher = {{Curran Associates, Inc.}}
+}
+
+
+% active OE:
+@article{sievert2023efficiently,
+ title={Efficiently Learning Relative Similarity Embeddings with Crowdsourcing},
+ author={Sievert, Scott and Nowak, Robert and Rogers, Timothy},
+ journal={Journal of Open Source Software},
+ volume={8},
+ number={84},
+ pages={4517},
+ year={2023},
+ doi = {10.21105/joss.04517}
+}
+@inproceedings{jamieson2015next,
+ title = {{{NEXT}}: {{A}} System for Real-World Development, Evaluation, and Application of Active Learning},
+ booktitle = {Advances in Neural Information Processing Systems},
+ author = {Jamieson, Kevin G and Jain, Lalit and Fernandez, Chris and Glattard, Nicholas J. and Nowak, Rob},
+ editor = {Cortes, C. and Lawrence, N. and Lee, D. and Sugiyama, M. and Garnett, R.},
+ year = {2015},
+ volume = {28},
+ publisher = {{Curran Associates, Inc.}}
+}
+
+@article{heim2015active,
+ title={Active perceptual similarity modeling with auxiliary information},
+ author={Heim, Eric and Berger, Matthew and Seversky, Lee and Hauskrecht, Milos},
+ journal={arXiv preprint arXiv:1511.02254},
+ year={2015},
+ doi = {10.48550/arXiv.1511.02254 }
+}
+
+% used software:
+@article{zhao2023perceiving,
+ title = {Perceiving Style at Different Levels of Information},
+ author = {Zhao, Yuguang and {de Ridder}, Huib and Stumpel, Jeroen and Wijntjes, Maarten},
+ year = {2023},
+ month = aug,
+ journal = {Journal of Vision},
+ volume = {23},
+ number = {9},
+ pages = {5388},
+ issn = {1534-7362},
+ doi = {10.1167/jov.23.9.5388},
+ urldate = {2023-09-22},
+ abstract = {If two painters paint the same scene, the appearance difference can be referred to as style difference. The distinguishing features result from artists' use of composition, color, brushstroke etc. We are interested in how people perceive different depiction styles, when they are presented with different levels of information. Whole paintings contain mid-level information (depicted scenes, etc.) and low-level information (brushstroke, colors, etc.). Square cut-outs of single objects contain only low-level information. The same cut-outs in grayscale contain low-level information but without colors. We collected 42 digitized oil paintings as stimuli, the creation years varied from 15th to 21st century, and their location of production varied from southern Spain to the northern Netherlands. All paintings contain at least one apple. We gathered similarity judgement data using a triplet comparison method from three online experiments, where observers were presented the whole paintings (condition 1), square cut-outs of painted apples (condition 2) and the same cut-outs in grayscale (condition 3). 20 observers completed each experiment (60 observers in total). We applied soft ordinal embedding to achieve multidimensional embeddings. We reached a 3D space for condition 1 and 3, and a 4D space for condition 2. Condition 2 has less information than condition 1, but has one more dimension, suggesting that different criteria might be involved. Condition 3 has one less dimension than condition 2, suggesting that color is one of the attributes for style perception judgement. In addition, having the same dimensionality, around 64\% of the raw data was in line with the 3D embedding in condition 1 and 58\% in condition 3. This difference suggests that although the whole scene and a grayscale cut-out both need three dimensions to describe their style differences, the implicit style criteria for grayscale cut-outs are apparently more ambiguous than those used to judge the whole paintings.},
+ file = {/home/dek/Zotero/storage/284ULDDE/article.html}
+}
+
+@misc{mandal2023revenue,
+ title = {A {{Revenue Function}} for {{Comparison-Based Hierarchical Clustering}}},
+ author = {Mandal, Aishik and Perrot, Micha{\"e}l and Ghoshdastidar, Debarghya},
+ year = {2023},
+ month = apr,
+ number = {arXiv:2211.16459},
+ eprint = {2211.16459},
+ primaryclass = {cs, stat},
+ publisher = {{arXiv}},
+ doi = {10.48550/arXiv.2211.16459},
+ urldate = {2023-09-22},
+ abstract = {Comparison-based learning addresses the problem of learning when, instead of explicit features or pairwise similarities, one only has access to comparisons of the form: \textbackslash emph\{Object \$A\$ is more similar to \$B\$ than to \$C\$.\} Recently, it has been shown that, in Hierarchical Clustering, single and complete linkage can be directly implemented using only such comparisons while several algorithms have been proposed to emulate the behaviour of average linkage. Hence, finding hierarchies (or dendrograms) using only comparisons is a well understood problem. However, evaluating their meaningfulness when no ground-truth nor explicit similarities are available remains an open question. In this paper, we bridge this gap by proposing a new revenue function that allows one to measure the goodness of dendrograms using only comparisons. We show that this function is closely related to Dasgupta's cost for hierarchical clustering that uses pairwise similarities. On the theoretical side, we use the proposed revenue function to resolve the open problem of whether one can approximately recover a latent hierarchy using few triplet comparisons. On the practical side, we present principled algorithms for comparison-based hierarchical clustering based on the maximisation of the revenue and we empirically compare them with existing methods.},
+ archiveprefix = {arxiv},
+ keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}
+}
diff --git a/paper/supplementary.md b/paper/supplementary.md
new file mode 100644
index 0000000..6049bd0
--- /dev/null
+++ b/paper/supplementary.md
@@ -0,0 +1,91 @@
+---
+title: |
+ | Supplementary Material
+ | `cblearn`: Comparison-based Machine Learning in Python
+author:
+ - |
+ | David-Elias Künstle and Ulrike von Luxburg
+ | University of Tübingen and Tübingen AI Center, Germany
+date: 22 September 2023
+---
+
+# Empirical evaluation
+
+We generated embeddings of comparison-based datasets to measure runtime and triplet error as a small empirical evaluation of our ordinal embedding implementations.
+We compared various CPU and GPU implementations in `cblearn` with third-party implementations in *R* [`loe` @terada_local_2014], and *MATLAB* [@van_der_maaten_stochastic_2012].
+In contrast to synthetic benchmarks [e.g., @vankadara_insights_2020], we used real-world datasets
+that can be accessed and converted to triplets through `cblearn`. The embeddings were arbitrarily chosen to be 2D.
+Every algorithm runs once per dataset on a compute node (8 CPU cores; 96GB RAM; NVIDIA RTX 2080ti) with a run-time limit of 24 hours. Some runs failed by exceeding those constraints: our FORTE implementation failed due to an "out of memory" error on the `imagenet-v2` dataset. The *MATLAB* implementation of tSTE timed out on `things` and `imagenet-v2` datasets. The run of the *R* SOE implementation on the `imagenet-v2` dataset failed by an "unsupported long vector" error caused by the large size of the requested embedding.
+
+The benchmarking scripts and results are publicly available[^1].
+
+[^1]: [https://github.com/cblearn/cblearn-benchmark](https://github.com/cblearn/cblearn-benchmark)
+
+## Is there a "best" estimator?
+
+
+Comparing the ordinal embedding estimators in `cblearn`, SOE, CKL, GNMDS, and tSTE were performing about equally well in both runtime and accuracy (\autoref{fig:performance-per-algorithm_cblearn}).
+The GPU implementations are slower on the tested datasets and noticeably less accurate for SOE and GNMDS.
+
+data:image/s3,"s3://crabby-images/5eed3/5eed3a124b412afd0edea2c273745670895d8ac0" alt="\label{fig:deltaerror-per-algorithm_cblearn-all}"{width=45%}
+data:image/s3,"s3://crabby-images/78945/7894588e4c768a5f0c92a0df427e2d3e923e1eb2" alt="\label{fig:deltatime-per-algorithm_cblearn-all}"{width=45%}
+\begin{figure}
+\caption{The triplet error and runtime per estimator and dataset relative to the mean error or the fastest run. Thin lines show runs on the different datasets; the thick lines indicate the respective median. Except for STE, all CPU algorithms can embed the triplets similarly well. There are just minor differences in the runtime of the CPU implementations. The GPU implementations are usually significantly slower on the data sets used.
+}
+ \label{fig:performance-per-algorithm_cblearn}
+\end{figure}
+
+## When should GPU implementations be preferred?
+
+Regarding accuracy and runtime, our GPU implementations using the `torch` backend could not outperform the CPU pendants using the `scipy` backend on the tested datasets. However, \autoref{fig:performance-per-algorithm_cblearn} shows the GPU runtime grows slower with the number of triplets, such that they potentially outperform CPU implementations with large datasets of $10^7$ triplets and more. Sometimes, the `torch` implementations show the best accuracy.
+
+data:image/s3,"s3://crabby-images/1c3e1/1c3e1fb8890112af5adb007871fe5711712a5e30" alt="The runtime increases almost linearly with the number of triplets. However, GPU implementations have a flatter slope and thus can compensate for the initial time overhead on large datasets.
+ \label{fig:time-per-triplets_gpu}"{width=50%}
+
+We could think of various explanations for the speed disadvantage of our `torch` implementations. On the one hand, it may be due to the overhead of converting between `numpy` and `torch` and calculating the gradient (AutoGrad). On the other hand, it can also be due to the optimizer or the selected hyperparameters.
+To get a first impression of these factors, we have built minimal examples of the CKL algorithm [@tamuz_adaptively_2011] and estimated 2D embeddings of the Vogue Cover dataset [@heikinheimo2013crowd]. \autoref{fig:torch-speedtest} shows a standard laptop's runtimes and triplet accuracies. The small markers show runs with different initializations, and the bold markers show the respective median performance. The CKL implementation of `cblearn` is slightly slower than the minimal version, probably due to data validation and conversion overheads. If the gradient is not provided directly but calculated automatically with PyTorch's AutoGrad functions, the minimal example runs multiple times slower. The most severe impact has been to change the optimization algorithm to stochastic optimization (*Adam*, lr=10). However, following the results in previous sections, it can be assumed that this overhead is compensated for by increasing the dataset size.
+
+data:image/s3,"s3://crabby-images/a4b80/a4b80b75932e1d23b3615746dac9de5e19bd3c91" alt="The runtime and error for different optimization methods in minimal CKL implementations. `cblearn`'s CKL implementation is shown for reference.
+\label{fig:torch-speedtest}"{width=50%}
+
+Another challenge for stochastic optimizers like *Adam* [@kingma2014adam] is their sensitivity to hyperparameter choices. This sensitivity is demonstrated in \autoref{fig:adam_lr}, where the learning rate of Adam is varied for the toy example. Likewise, tuning the optimizer parameters could improve the performance of the `torch` ordinal embedding implementations.
+
+
+data:image/s3,"s3://crabby-images/e00cb/e00cb9e9529f3ee32e3b7cf81dc75a33b5172ada" alt="The runtime and error for different learning rates of the Adam optimizer in a minimal example with CKL estimating a 2D embedding of 60 objects.
+\label{fig:adam_lr}"{width=50%}
+
+Besides all discussions about runtime and accuracy, the `torch` backend provides benefits for maintaining and expanding the library. It uses PyTorch's automatic differentiation [@paszke2019pytorch] so that the loss gradient does not have to be explicitly defined, and new algorithms can be implemented very quickly.
+
+## How does `cblearn` compare to other implementations?
+
+In a small comparison, our implementations run multiple times faster with approximately the same accuracy as reference implementations (\autoref{fig:performance-per-algorithm_library}).
+We compared our CPU implementations of SOE the corresponding reference implementations in *R*, `loe` [@terada_local_2014], and our implementation of CKL, GNMDS, STE, tSTE with the *MATLAB* of @van_der_maaten_stochastic_2012.
+This comparison is not exhaustive, but it shows that our implementations are competitive with the reference implementations in terms of accuracy and runtime. Of course, we cannot separate the factors of algorithm implementation and runtime environment.
+
+data:image/s3,"s3://crabby-images/7bec5/7bec5d65a4d011383973842dab43598cd741c1df" alt="\label{fig:deltaerror-per-algorithm_library}"{width=45%}
+data:image/s3,"s3://crabby-images/d1fc0/d1fc0920313acb7dd3c9ad8b50df935275adacdd" alt="\label{fig:deltatime-per-algorithm_library}"{width=45%}
+\begin{figure}[!ht]
+ \caption{The triplet error and runtime per estimator relative to the mean error and the fastest run on each dataset. Thin lines show runs on the different datasets; the thick lines indicate the respective median. The triplet error is approximately similar for all implementations but STE. For all algorithms, `cblearn` provides the fastest implementation.}
+ \label{fig:performance-per-algorithm_library}
+\end{figure}
+
+
+# Code example\label{sec:code-example}
+
+```Python
+from cblearn import datasets, preprocessing, embedding
+from sklearn.model_selection import cross_val_score
+import seaborn as sns; sns.set_theme("poster", "whitegrid")
+
+cars = datasets.fetch_car_similarity()
+triplets = preprocessing.triplets_from_mostcentral(cars.triplet, cars.response)
+accuracy = cross_val_score(embedding.SOE(n_components=2), triplets, cv=5).mean()
+embedding = embedding.SOE(n_components=2).fit_transform(triplets)
+fg = sns.relplot(x=embedding[:, 0], y=embedding[:, 1],
+ hue=cars.class_name[cars.class_id])
+fg.set(title=f"accuracy={accuracy:.2f}", xticklabels=[], yticklabels=[])
+fg.tight_layout(); fg.savefig("images/car_example.pdf")
+```
+data:image/s3,"s3://crabby-images/79f6d/79f6d3e31d00a3ce578cbbf39df54ebef7565cef" alt=""{width=75%}
+
+# References
diff --git a/paper/supplementary.pdf b/paper/supplementary.pdf
new file mode 100644
index 0000000..d1f98d5
Binary files /dev/null and b/paper/supplementary.pdf differ
diff --git a/setup.cfg b/setup.cfg
index 2a4095c..165faee 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -32,10 +32,10 @@ zip_safe = False
include_package_data = True
install_requires =
numpy~=1.26 # numba 0.57 requires numpy <1.25
- scipy~=1.11
- scikit-learn~=1.3
+ scipy~=1.13
+ scikit-learn~=1.5
sparse~=0.15
- numba>=0.58 # force numba version supporting py3.11
+ numba>=0.59 # force numba version supporting py3.11
[options.package_data]
@@ -58,9 +58,9 @@ tests =
pandas~=2.1
matplotlib~=3.8
docs =
- sphinx~=7.2
- sphinx_rtd_theme~=2.0
- sphinx-gallery~=0.15
+ sphinx~=7.3
+ pydata-sphinx-theme~=0.15
+ sphinx-gallery~=0.16
matplotlib~=3.8
[build_sphinx]