From a51c68b7fc5091e38a767a76ea455dbb421c4b96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David-Elias=20K=C3=BCnstle?= Date: Tue, 8 Aug 2023 22:50:29 +0200 Subject: [PATCH] Fix docs (#62) * Remove warning * Update python version for .readthedocs.yml * Minor fixes in documentation * Update file paths in imagenet dataset * Update _food_similarity.py: Do not verify ssl * Update CHANGELOG.md v0.1.1 --- .readthedocs.yml | 7 ++++-- CHANGELOG.md | 7 +++++- cblearn/datasets/_food_similarity.py | 13 ++++++++++- cblearn/datasets/_imagenet_similarity.py | 6 ++--- docs/conf.py | 8 +++---- docs/contributor_guide/index.rst | 28 ++++++++++++------------ docs/index.rst | 3 --- docs/install.rst | 14 ++++++------ docs/user_guide/index.rst | 9 +++++--- 9 files changed, 56 insertions(+), 39 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 13e9c70..dc32650 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -9,9 +9,12 @@ version: 2 sphinx: configuration: docs/conf.py -# Optionally set the version of Python and requirements required to build your docs +build: + os: ubuntu-22.04 + tools: + python: "3.10" + python: - version: 3.8 install: - method: pip path: . diff --git a/CHANGELOG.md b/CHANGELOG.md index cf6b61e..bce0417 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,12 @@ # Changelog +## 0.1.1 + +- Minor fixes in the documentation. +- Adapt loading of food and imagenet dataset to solve problems caused by changes in externally hosted files + ## 0.1.0 - Support python 3.9 and 3.10. - Introduce semantic versioning -- Publish to PyPI \ No newline at end of file +- Publish to PyPI diff --git a/cblearn/datasets/_food_similarity.py b/cblearn/datasets/_food_similarity.py index a871c1d..e9d6628 100644 --- a/cblearn/datasets/_food_similarity.py +++ b/cblearn/datasets/_food_similarity.py @@ -4,6 +4,7 @@ import os from typing import Optional, Union import zipfile +import ssl import numpy as np from sklearn.datasets import _base @@ -23,6 +24,10 @@ def fetch_food_similarity(data_home: Optional[os.PathLike] = None, download_if_m return_triplets: bool = False) -> Union[Bunch, np.ndarray]: """ Load the Food-100 food similarity dataset (triplets). + .. warning:: + This function downloads the file without verifying the ssl signature to circumvent an outdated certificate of the dataset hosts. + However, after downloading the function verifies the file checksum before loading the file to minimize the risk of man-in-the-middle attacks. + =================== ===================== Triplets 190376 Objects 100 @@ -72,7 +77,13 @@ def fetch_food_similarity(data_home: Optional[os.PathLike] = None, download_if_m logger.info('Downloading food similarity from {} to {}'.format(ARCHIVE.url, data_home)) - archive_path = _base._fetch_remote(ARCHIVE, dirname=data_home) + try: + ssl_default = ssl._create_default_https_context + ssl._create_default_https_context = ssl._create_unverified_context + archive_path = _base._fetch_remote(ARCHIVE, dirname=data_home) + finally: + ssl._create_default_https_context = ssl_default + with zipfile.ZipFile(archive_path) as zf: with zf.open('food100-dataset/all-triplets.csv', 'r') as f: triplets = np.loadtxt(f, dtype=str, delimiter=';') diff --git a/cblearn/datasets/_imagenet_similarity.py b/cblearn/datasets/_imagenet_similarity.py index 0ffccec..169a5fb 100644 --- a/cblearn/datasets/_imagenet_similarity.py +++ b/cblearn/datasets/_imagenet_similarity.py @@ -112,13 +112,13 @@ def fetch_imagenet_similarity(data_home: Optional[os.PathLike] = None, download_ with zipfile.ZipFile(archive_path) as zf: import h5py - with zf.open('val/data/psiz0.4.1/obs-118.hdf5', 'r') as f: + with zf.open('data/deprecated/psiz0.4.1/obs-118.hdf5', 'r') as f: data_v1 = {k: np.asarray(v[()]) for k, v in h5py.File(f, mode='r').items()} - with zf.open('val/data/psiz0.4.1/obs-195.hdf5', 'r') as f: + with zf.open('data/deprecated/psiz0.4.1/obs-195.hdf5', 'r') as f: data_v2 = {k: np.asarray(v[()]) for k, v in h5py.File(f, mode='r').items()} - with zf.open('val/data/psiz0.4.1/catalog.hdf5', 'r') as f: + with zf.open('data/deprecated/psiz0.4.1/catalog.hdf5', 'r') as f: catalog = {k: np.asarray(v[()]) for k, v in h5py.File(f, mode='r').items()} joblib.dump((data_v1, data_v2, catalog), filepath, compress=6) diff --git a/docs/conf.py b/docs/conf.py index 2329c4e..5618309 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,8 +18,8 @@ # -- Project information ----------------------------------------------------- project = 'cblearn' -author = 'David-Elias Künstle, Leena Suresh, Siyavash Haghiri, Michael Perrot, Debarghya Ghoshdastidari, Ulrike von Luxburg' -copyright = f'2021, {author}' +author = 'David-Elias Künstle & Ulrike von Luxburg' +copyright = f'2023, {author}' # -- General configuration --------------------------------------------------- @@ -71,9 +71,7 @@ # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] -rst_prolog = """.. attention:: - cblearn is work in progress. The API is still changing and a errors are known. Please help us by posting an issue on Github. -""" +rst_prolog = """""" # -- Options for HTML output ------------------------------------------------- diff --git a/docs/contributor_guide/index.rst b/docs/contributor_guide/index.rst index 7e0a1f1..e3e233d 100644 --- a/docs/contributor_guide/index.rst +++ b/docs/contributor_guide/index.rst @@ -13,31 +13,31 @@ This guide describes how to contribute code or documentation. Getting Started --------------- -We assume, you downloaded and installed cblearn as described in :ref:`developer_install`. +We assume you downloaded and installed ``cblearn`` as described in :ref:`developer_install`. The project directory contains the code directory ``cblearn/`` and the documentation ``docs/``. -In addition, there are readme, license, and a multiple configuration files as well as an examples folder. +In addition, the folder contains a readme, license, multiple configuration files, and an examples folder. ------------- Changing Code ------------- The Python code is structured in :ref:`modules`. Each module contains -a `tests` folder with unit-tests. +a `tests` folder with unit tests. There should be such a test for every method and function. -Use ``pytest --cov`` to run these tests and to measure the coverage, no tests should fail. +Use ``pytest --cov`` to run these tests and to measure the coverage; no tests should fail. The coverage indicates the tested fraction of code and should be close to *100%*. You can exclude some of the more time expensive tests by ``pytest -m "not (sklearn or download)``. All Python code follows the `PEP8 Style Guide`_. The style of all code can be checked, running ``flake8 .`` and should print no warnings. -Every class, method, and function should also have a docstring, describing the functionality and parameters. +Every class, method, and function should have a docstring describing the functionality and parameters. Please follow the `Google Docstring Style`_. The docstring will be added to the :ref:`api_ref` by adding the function name in ``docs/references/index.rst``. Check the syntax of the docstring by running ``make html`` in the ``docs/`` folder. -Types should not be added to the docstring, but in the code as `type hints`_. +Types should not be added to the docstring but in the code as `type hints`_. Typechecks can be performed using ``mypy cblearn``. .. _PEP8 Style Guide: https://www.python.org/dev/peps/pep-0008/ @@ -49,7 +49,7 @@ Changing Documentation ---------------------- The documentation is contained in the `docs/` folder. -It can be build by running ``make html``. +It can be built by running ``make html``. Open ``docs/_build/html/index.html`` in a browser to view the local build of the documentation. The documentation is structured in multiple folders and written as `reStructuredText`_. @@ -65,8 +65,8 @@ to run the whole testing workflow, which is used on Github, locally. Install nektos' `act`_ and then run `act -P ubuntu-latest=nektos/act-environments-ubuntu:18.04-full` -`act` is using docker images with preinstalled software to provide almost the same test environment as Github. -If it is not yet so, you have to `install docker`_ and, optionally, make it accessible for nonroot user. +`act` uses docker images with preinstalled software to provide almost the same test environment as Github. +If it is not yet so, you have to `install docker`_ and, optionally, make it accessible for non-root users. .. note:: The docker image requires about 18 GB disk space. The first start of act might take some time, @@ -80,13 +80,13 @@ If it is not yet so, you have to `install docker`_ and, optionally, make it acce Publish Changes ------------------ -Most contributions will change files either in the code or in the documentation directory, as described in the +Most contributions will change files in the code or the documentation directory, as described in the sections below. Commit your changes to a separate *git* branch (do **not** commit to ``master``). -After you finished changing push this branch to Github and open a pull request to the ``master`` branch there. +After changing, push this branch to Github and open a pull request to the ``master`` branch there. Once the request is opened, automated tests are run. If these tests indicate a problem, you can fix this problem on your branch and push again. -Once the automated tests are successful, maintainers of cblearn will review the changes and provide feedback. -Usually after some iterations, your changes will be merged to the ``master`` branch. +Once the automated tests are successful, maintainers of ``cblearn`` will review the changes and provide feedback. +Usually, after some iterations, your changes will be merged into the ``main`` branch. .. Note: @@ -97,6 +97,6 @@ Usually after some iterations, your changes will be merged to the ``master`` bra Versions should be semantic and follow PIP440_: The version indicates ``major.minor.fix``; breaking changes are just allowed with major version steps. -A new version is indicated with a Github release tag, which trigger continuous deployment to PyPI via Github Actions. +A Github release tag indicates a new version, which triggers a continuous deployment to PyPI via Github Actions. .. _PIP440: https://peps.python.org/pep-0440/ diff --git a/docs/index.rst b/docs/index.rst index a3635f8..8458f0e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,9 +6,6 @@ Welcome to cblearn's documentation! =================================== -.. warning:: - cblearn is work in progress. The API can change and bugs appear. Please help us by posting an issue on Github. - .. toctree:: :maxdepth: 2 :caption: Contents: diff --git a/docs/install.rst b/docs/install.rst index d167f70..1066cd0 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -2,16 +2,16 @@ Installation ============ -cblearn requires Python 3.8 or newer. +``cblearn`` requires Python 3.9 or newer. We recommend using Anaconda_ to install Python and dependencies in separated environments. -We support Linux (tested on Ubuntu 20.4), Windows and Mac OS. +We mainly test on Linux (tested on Ubuntu 20.4), but Windows and Mac OS should also work. Examples in this installation guide Linux shell commands. .. _Anaconda: https://docs.anaconda.com/anaconda/install/ ``` -conda create -n cblearn python==3.8 +conda create -n cblearn python==3.9 conda activate cblearn ``` @@ -19,7 +19,7 @@ conda activate cblearn User Installation ----------------- -cblearn and its dependencies can be installed using `pip`: +``cblearn`` and its dependencies can be installed using `pip`: .. code-block:: bash @@ -36,8 +36,8 @@ for most uses. However, some features require more packages that can be installed by adding an `option` to the install command.. -For example, to use estimators on GPU, based on pytorch, and estimators -wrapping paper author's original implementation in R-lang: +For example, to use estimators on GPU, based on ``pytorch``, and estimators +wrapping paper author's original implementation in ``R``-lang: .. code-block:: bash @@ -62,7 +62,7 @@ Contributor Installation If you want to make changes to the code or documentation, you should first download the repository and install the project in developer mode with developer dependencies. -This way, changes in the code are directly considered without the need of re-installation. +This way, changes in the code are directly considered without the need for re-installation. .. code-block:: bash diff --git a/docs/user_guide/index.rst b/docs/user_guide/index.rst index b183f05..87ab13b 100644 --- a/docs/user_guide/index.rst +++ b/docs/user_guide/index.rst @@ -5,14 +5,14 @@ User Guide Most Machine Learning algorithms use numerical training data (features) for inference, either representing points in a Euclidean space, similarities, or distances. The are settings, e.g. in human studies, when metric points are not available but only ordinal comparisons. -Comparison-based Learning are Machine Learning algorithms, applicable in this setting. +Comparison-based learning algorithms are the machine learning algorithms applicable in this setting. ------------------- Triplet comparisons ------------------- Triplet comparisons are the most common form of ordinal comparisons. For the triplet of objects :math:`(i, j, k)` -one can ask "Is the object i more similar to the object j or to the object k?". +one can ask, "Is the object i more similar to the object j or k?". For the unknown points :math:`(x_i, x_j, x_k)` and the distance metric :math:`\delta`, the question corresponds to the following inequality: @@ -20,7 +20,10 @@ inequality: \delta(x_i, x_j) \le \delta(x_i, x_k). -This library supports two representation formats of triplets, in an array or in an sparse matrix. +This library supports two representation formats of triplets in an array or a sparse matrix form. +The array form uses 2d ``numpy`` arrays representing a triplet per row and columns for ``i,j,k``. +Alternatively to the ordering, an additional response array containing 1 or -1 can specify if ``(i,j,k)`` is correct or wrong. +The sparse matrix is an alternative representation, where triplets are naturally specified as the matrix indices, containing entries 1 or -1. -------------------------