Skip to content

Commit

Permalink
Remove duplicate references (cblearn#81)
Browse files Browse the repository at this point in the history
  • Loading branch information
dekuenstle authored Jun 12, 2024
1 parent 3ef9181 commit 05b3bc3
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 31 deletions.
11 changes: 11 additions & 0 deletions cblearn/datasets/_triplet_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@
from cblearn.datasets._datatypes import NoiseTarget, Distance


def _count_unique_items(query):
""" Count unique items per row in a 2D array.
Efficient approach even for large number of rows
and integer items:
https://stackoverflow.com/a/48473125
"""
sorted_query = np.sort(query, axis=1)
return (sorted_query[:, 1:] != sorted_query[:, :-1]).sum(axis=1) + 1


def _count_unique_items(query):
""" Count unique items per row in a 2D array.
Expand Down
2 changes: 1 addition & 1 deletion docs/getting_started/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,4 @@ once enough are available.
The triplet generator's `result_format` option specifies the expected data format of the triplets, as triplets can be represented in different ways.
This example uses the `list-order` format, a list of triplets, containing the indices of an anchor, near, and far point.
Learn more about data formats and other aspects of the library in the :ref:`user_guide`.
Alternatively, you can find more code in the :ref:`examples` or get an overview of the :ref:`api`.
Alternatively, you can find more code in the :ref:`examples` or get an overview of the :ref:`api_ref`.
67 changes: 37 additions & 30 deletions paper/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ @article{fsauerObjectiveMeasurementApproach2024
copyright = {All rights reserved},
langid = {english},
}

@article{huber2024tracing,
title={Tracing Truth Through Conceptual Scaling: Mapping People’s Understanding of Abstract Concepts},
author={Huber, Lukas S and K{\"u}nstle, David-Elias and Reuter, Kevin},
Expand All @@ -32,8 +33,17 @@ @article{huber2024tracing
doi={10.31234/osf.io/c42yr}
}


@article{Sievert2023, doi = {10.21105/joss.04517}, url = {https://doi.org/10.21105/joss.04517}, year = {2023}, publisher = {The Open Journal}, volume = {8}, number = {84}, pages = {4517}, author = {Scott Sievert and Robert Nowak and Timothy Rogers}, title = {Efficiently Learning Relative Similarity Embeddings with Crowdsourcing}, journal = {Journal of Open Source Software} }
@article{Sievert2023,
doi = {10.21105/joss.04517},
url = {https://doi.org/10.21105/joss.04517},
year = {2023}, publisher = {The Open Journal},
volume = {8},
number = {84},
pages = {4517},
author = {Scott Sievert and Robert Nowak and Timothy Rogers},
title = {Efficiently Learning Relative Similarity Embeddings with Crowdsourcing},
journal = {Journal of Open Source Software}
}

@inproceedings{NIPS2015_89ae0fe2,
author = {Jamieson, Kevin G and Jain, Lalit and Fernandez, Chris and Glattard, Nicholas J. and Nowak, Rob},
Expand All @@ -47,7 +57,6 @@ @inproceedings{NIPS2015_89ae0fe2
year = {2015}
}


@article{vankadara_insights_2020,
title = {Insights into {Ordinal} {Embedding} {Algorithms}: {A} {Systematic} {Evaluation}},
shorttitle = {Insights into {Ordinal} {Embedding} {Algorithms}},
Expand Down Expand Up @@ -139,9 +148,20 @@ @article{maloney_maximum_2003
}

@InProceedings{agarwal_generalized_2007,
title = {Generalized Non-metric Multidimensional Scaling},
author = {Agarwal, Sameer and Wills, Josh and Cayton, Lawrence and Lanckriet, Gert and Kriegman, David and Belongie, Serge}, booktitle = {Proceedings of the Eleventh International Conference on Artificial Intelligence and Statistics}, pages = {11--18}, year = {2007}, editor = {Meila, Marina and Shen, Xiaotong}, volume = {2}, series = {Proceedings of Machine Learning Research}, address = {San Juan, Puerto Rico}, month = {21--24 Mar}, publisher = {PMLR}, pdf = {http://proceedings.mlr.press/v2/agarwal07a/agarwal07a.pdf}, url = {https://proceedings.mlr.press/v2/agarwal07a.html}}

title = {Generalized Non-metric Multidimensional Scaling},
author = {Agarwal, Sameer and Wills, Josh and Cayton, Lawrence and Lanckriet, Gert and Kriegman, David and Belongie, Serge},
booktitle = {Proceedings of the Eleventh International Conference on Artificial Intelligence and Statistics},
pages = {11--18},
year = {2007},
editor = {Meila, Marina and Shen, Xiaotong},
volume = {2},
series = {Proceedings of Machine Learning Research},
address = {San Juan, Puerto Rico},
month = {21--24 Mar},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v2/agarwal07a/agarwal07a.pdf},
url = {https://proceedings.mlr.press/v2/agarwal07a.html}
}

@inproceedings{terada_local_2014,
title = {Local ordinal embedding},
Expand Down Expand Up @@ -226,21 +246,15 @@ @inproceedings{kingma2014adam
biburl = {https://dblp.org/rec/journals/corr/KingmaB14.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{heikinheimo2013crowd,
title={The crowd-median algorithm},
author={Heikinheimo, Hannes and Ukkonen, Antti},
booktitle={Proceedings of the AAAI Conference on Human Computation and Crowdsourcing},
volume={1},
pages={69--77},
year={2013}
}

@article{paszke2019pytorch,
title={Pytorch: An imperative style, high-performance deep learning library},
author={Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and others},
journal={Advances in neural information processing systems},
volume={32},
year={2019}
}

@inproceedings{anselPyTorchFasterMachine2024,
title = {{{PyTorch}} 2: {{Faster Machine Learning Through Dynamic Python Bytecode Transformation}} and {{Graph Compilation}}},
shorttitle = {{{PyTorch}} 2},
Expand All @@ -256,6 +270,7 @@ @inproceedings{anselPyTorchFasterMachine2024
isbn = {9798400703850},
langid = {english},
}

@article{virtanenSciPyFundamentalAlgorithms2020,
title = {{{SciPy}} 1.0: Fundamental Algorithms for Scientific Computing in {{Python}}},
shorttitle = {{{SciPy}} 1.0},
Expand All @@ -275,6 +290,7 @@ @article{virtanenSciPyFundamentalAlgorithms2020
langid = {english},
keywords = {Biophysical chemistry,Computational biology and bioinformatics,Technology},
}

@article{harris_array_2020,
title = {Array Programming with {{NumPy}}},
author = {Harris, Charles R. and Millman, K. Jarrod and {van der Walt}, St{\'e}fan J. and Gommers, Ralf and Virtanen, Pauli and Cournapeau, David and Wieser, Eric and Taylor, Julian and Berg, Sebastian and Smith, Nathaniel J. and Kern, Robert and Picus, Matti and Hoyer, Stephan and {van Kerkwijk}, Marten H. and Brett, Matthew and Haldane, Allan and {del R{\'i}o}, Jaime Fern{\'a}ndez and Wiebe, Mark and Peterson, Pearu and {G{\'e}rard-Marchant}, Pierre and Sheppard, Kevin and Reddy, Tyler and Weckesser, Warren and Abbasi, Hameer and Gohlke, Christoph and Oliphant, Travis E.},
Expand All @@ -289,6 +305,7 @@ @article{harris_array_2020
langid = {english},
keywords = {Computational neuroscience,Computational science,Computer science,Software,Solar physics}
}

@article{hebartRevealingMultidimensionalMental2020,
title = {Revealing the multidimensional mental representations of natural objects underlying human similarity judgements},
volume = {4},
Expand Down Expand Up @@ -379,6 +396,7 @@ @article{buitinck_api_2013
journal={arXiv:1309.0238 [cs.LG]},
doi = {10.48550/arXiv.1309.0238},
}

@inproceedings{ghoshdastidar_foundations_2019,
title = {Foundations of {Comparison}-{Based} {Hierarchical} {Clustering}},
abstract = {We address the classical problem of hierarchical clustering, but in a framework where one does not have access to a representation of the objects or their pairwise similarities. Instead, we assume that only a set of comparisons between objects is available, that is, statements of the form objects i and j are more similar than objects k and l.'' Such a scenario is commonly encountered in crowdsourcing applications. The focus of this work is to develop comparison-based hierarchical clustering algorithms that do not rely on the principles of ordinal embedding. We show that single and complete linkage are inherently comparison-based and we develop variants of average linkage. We provide statistical guarantees for the different methods under a planted hierarchical partition model. We also empirically demonstrate the performance of the proposed approaches on several datasets.},
Expand All @@ -387,28 +405,14 @@ @inproceedings{ghoshdastidar_foundations_2019
year = {2019},
}

@article{harris_array_2020,
title = {Array programming with {NumPy}},
volume = {585},
copyright = {2020 The Author(s)},
abstract = {Array programming provides a powerful, compact and expressive syntax for accessing, manipulating and operating on data in vectors, matrices and higher-dimensional arrays. NumPy is the primary array programming library for the Python language. It has an essential role in research analysis pipelines in fields as diverse as physics, chemistry, astronomy, geoscience, biology, psychology, materials science, engineering, finance and economics. For example, in astronomy, NumPy was an important part of the software stack used in the discovery of gravitational waves1 and in the first imaging of a black hole2. Here we review how a few fundamental array concepts lead to a simple and powerful programming paradigm for organizing, exploring and analysing scientific data. NumPy is the foundation upon which the scientific Python ecosystem is constructed. It is so pervasive that several projects, targeting audiences with specialized needs, have developed their own NumPy-like interfaces and array objects. Owing to its central position in the ecosystem, NumPy increasingly acts as an interoperability layer between such array computation libraries and, together with its application programming interface (API), provides a flexible framework to support the next decade of scientific and industrial analysis.},
language = {en},
number = {7825},
journal = {Nature},
author = {Harris, Charles R. and Millman, K. Jarrod and van der Walt, Stéfan J. and Gommers, Ralf and Virtanen, Pauli and Cournapeau, David and Wieser, Eric and Taylor, Julian and Berg, Sebastian and Smith, Nathaniel J. and Kern, Robert and Picus, Matti and Hoyer, Stephan and van Kerkwijk, Marten H. and Brett, Matthew and Haldane, Allan and del Río, Jaime Fernández and Wiebe, Mark and Peterson, Pearu and Gérard-Marchant, Pierre and Sheppard, Kevin and Reddy, Tyler and Weckesser, Warren and Abbasi, Hameer and Gohlke, Christoph and Oliphant, Travis E.},
year = {2020},
keywords = {Computational neuroscience, Computational science, Computer science, Software, Solar physics},
pages = {357--362},
doi = {10.1038/s41586-020-2649-2},
}

@inproceedings{perrot_near-optimal_2020,
title = {Near-optimal comparison based clustering},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
author = {Perrot, Michaël and Esser, Pascal and Ghoshdastidar, Debarghya},
editor = {Larochelle, H. and Ranzato, M. and Hadsell, R. and Balcan, M.F. and Lin, H.},
year = {2020},
}

@inproceedings{heikinheimo2013crowd,
title={The crowd-median algorithm},
author={Heikinheimo, Hannes and Ukkonen, Antti},
Expand All @@ -433,6 +437,7 @@ @InProceedings{amid2015
publisher = {PMLR},
abstract = {For humans, it is usually easier to make statements about the similarity of objects in relative, rather than absolute terms. Moreover, subjective comparisons of objects can be based on a number of different and independent attributes. For example, objects can be compared based on their shape, color, etc. In this paper, we consider the problem of uncovering these hidden attributes given a set of relative distance judgments in the form of triplets. The attribute that was used to generate a particular triplet in this set is unknown. Such data occurs, e.g., in crowdsourcing applications where the triplets are collected from a large group of workers. We propose the Multiview Triplet Embedding (MVTE) algorithm that produces a number of low-dimensional maps, each corresponding to one of the hidden attributes. The method can be used to assess how many different attributes were used to create the triplets, as well as to assess the difficulty of a distance comparison task, and find objects that have multiple interpretations in relation to the other objects.}
}

@inproceedings{balcan2016learning,
title={Learning combinatorial functions from pairwise comparisons},
author={Balcan, Maria-Florina and Vitercik, Ellen and White, Colin},
Expand All @@ -441,6 +446,7 @@ @inproceedings{balcan2016learning
year={2016},
organization={PMLR}
}

@inproceedings{anderton2019scaling,
title={Scaling up ordinal embedding: A landmark approach},
author={Anderton, Jesse and Aslam, Javed},
Expand All @@ -449,6 +455,7 @@ @inproceedings{anderton2019scaling
year={2019},
organization={PMLR}
}

@inproceedings{bower2018landscape,
title={The landscape of non-convex quadratic feasibility},
author={Bower, Amanda and Jain, Lalit and Balzano, Laura},
Expand All @@ -458,6 +465,7 @@ @inproceedings{bower2018landscape
organization={IEEE},
doi={10.1109/icassp.2018.8461868}
}

@inproceedings{ghosh2019landmark,
title = {Landmark Ordinal Embedding},
booktitle = {Advances in Neural Information Processing Systems},
Expand All @@ -468,7 +476,6 @@ @inproceedings{ghosh2019landmark
publisher = {{Curran Associates, Inc.}}
}


% active OE:
@article{sievert2023efficiently,
title={Efficiently Learning Relative Similarity Embeddings with Crowdsourcing},
Expand Down

0 comments on commit 05b3bc3

Please sign in to comment.