Skip to content

Commit

Permalink
Merge pull request #981 from AFM-SPM/SylviaWhittle/hidden-config-options
Browse files Browse the repository at this point in the history
Add support for partial config files
  • Loading branch information
SylviaWhittle authored Nov 8, 2024
2 parents 968124d + e3877ca commit eddd2ef
Show file tree
Hide file tree
Showing 10 changed files with 353 additions and 16 deletions.
29 changes: 29 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,35 @@ create-config --help` for further details).
topostats create-config
```

## Partial configurations

TopoStats supports using a partial configuration, where you specify only the fields you wish to override. This is
useful if you only want to change a few parameters from the default configuration or would like to use a configuration
file that is smaller and easier to read.

To create a partial configuration file, simply create a new config file and delete anything you don't want to override.

TopoStats will take the partial configuration file and merge it with the default configuration file, with the partial
configuration taking precedence. This means that any fields you specify in the partial configuration will override the
default configuration, while any fields you don't specify will be taken from the default configuration. Command-line
arguments will override both the default and partial configurations.

For example, you could use a configuration as simple as:

```yaml
base_dir: ./mydata/
output_dir: ./myoutput/
filter:
remove_scars:
run: true
grains:
threshold_method: absolute
threshold_absolute:
above: 1.2
absolute_area_threshold:
above: [400, 1000]
```
## Using a custom configuration
If you have generated a configuration file you can modify and edit a configuration it to change the parameters (see
Expand Down
45 changes: 45 additions & 0 deletions tests/resources/test_partial_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# For more information on configuration and how to use it:
# https://afm-spm.github.io/TopoStats/main/configuration.html
base_dir: ./ # Directory in which to search for data files
output_dir: ./output # Directory to output results to
log_level: info # Verbosity of output. Options: warning, error, info, debug
cores: 2 # Number of CPU cores to utilise for processing multiple files simultaneously.
file_ext: .spm # File extension of the data files.
loading:
channel: Height # Channel to pull data from in the data files.
filter:
run: true # Options : true, false
row_alignment_quantile: 0.5 # lower values may improve flattening of larger features
threshold_method: absolute # Options : otsu, std_dev, absolute
otsu_threshold_multiplier: 1.0
threshold_std_dev:
below: 10.0 # Threshold for data below the image background
above: 1.0 # Threshold for data above the image background
threshold_absolute:
below: -1.0 # Threshold for data below the image background
above: 1.0 # Threshold for data above the image background
gaussian_size: 1.0121397464510862 # Gaussian blur intensity in px
gaussian_mode: nearest
grains:
run: true # Options : true, false
# Thresholding by height
threshold_method: std_dev # Options : std_dev, otsu, absolute, unet
otsu_threshold_multiplier: 1.0
threshold_std_dev:
below: 10.0 # Threshold for grains below the image background
above: 1.0 # Threshold for grains above the image background
threshold_absolute:
below: -1.0 # Threshold for grains below the image background
above: 1.0 # Threshold for grains above the image background
direction: above # Options: above, below, both (defines whether to look for grains above or below thresholds or both)
# Thresholding by area
smallest_grain_size_nm2: 50 # Size in nm^2 of tiny grains/blobs (noise) to remove, must be > 0.0
absolute_area_threshold:
above: [300, 3000] # above surface [Low, High] in nm^2 (also takes null)
below: [null, null] # below surface [Low, High] in nm^2 (also takes null)
remove_edge_intersecting_grains: true # Whether or not to remove grains that touch the image border
unet_config:
model_path: null # Path to a trained U-Net model
grain_crop_padding: 2 # Padding to apply to the grain crop bounding box
upper_norm_bound: 5.0 # Upper bound for normalisation of input data. This should be slightly higher than the maximum desired / expected height of grains.
lower_norm_bound: -1.0 # Lower bound for normalisation of input data. This should be slightly lower than the minimum desired / expected height of the background.
15 changes: 15 additions & 0 deletions tests/test_entry_point.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,18 @@ def test_entry_point_create_config_file(tmp_path: Path) -> None:
]
)
assert Path(f"{tmp_path}/test_create_config.yaml").is_file()


def test_entry_point_create_simple_config_file(tmp_path: Path) -> None:
"""Test that the entry point is able to produce a simple config file when asked to."""
entry_point(
manually_provided_args=[
"create-config",
"--filename",
"test_create_simple_config.yaml",
"--output-dir",
f"{tmp_path}",
"--simple",
]
)
assert Path(f"{tmp_path}/test_create_simple_config.yaml").is_file()
32 changes: 32 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
hdf5_to_dict,
load_array,
load_pkl,
merge_mappings,
path_to_str,
read_64d,
read_char,
Expand Down Expand Up @@ -62,6 +63,36 @@
# pylint: disable=too-many-lines


@pytest.mark.parametrize(
("dict1", "dict2", "expected_merged_dict"),
[
pytest.param(
{"a": 1, "b": 2},
{"c": 3, "d": 4},
{"a": 1, "b": 2, "c": 3, "d": 4},
id="two dicts, no common keys",
),
pytest.param(
{"a": 1, "b": 2},
{"b": 3, "c": 4},
{"a": 1, "b": 3, "c": 4},
id="two dicts, one common key, testing priority of second dict",
),
# Nested dictionaries
pytest.param(
{"a": 1, "b": {"c": 2, "d": 3}},
{"b": {"c": 4, "e": 5}},
{"a": 1, "b": {"c": 4, "d": 3, "e": 5}},
id="nested dictionaries, one common key in nested dict, testing priority of second dict",
),
],
)
def test_merge_mappings(dict1: dict, dict2: dict, expected_merged_dict: dict) -> None:
"""Test merging of mappings."""
merged_dict = merge_mappings(dict1, dict2)
assert merged_dict == expected_merged_dict


def test_get_date_time() -> None:
"""Test the fetching of a formatted date and time string."""
assert datetime.strptime(get_date_time(), "%Y-%m-%d %H:%M:%S")
Expand Down Expand Up @@ -97,6 +128,7 @@ def test_write_config_with_comments(tmp_path: Path, filename: str, config: str,
args.filename = filename
args.output_dir = tmp_path
args.config = config
args.simple = False

# Write default config with comments to file
write_config_with_comments(args)
Expand Down
70 changes: 70 additions & 0 deletions tests/test_run_topostats.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,86 @@
"""Test end-to-end running of topostats."""

import argparse
import logging
from pathlib import Path

import pytest

from topostats.entry_point import entry_point
from topostats.logs.logs import LOGGER_NAME
from topostats.run_topostats import reconcile_config_args
from topostats.validation import DEFAULT_CONFIG_SCHEMA, validate_config

BASE_DIR = Path.cwd()


def test_reconcile_config_args_no_config() -> None:
"""Test the handle config file function with no config."""
args = argparse.Namespace(
program="process",
config_file=None,
)
config = reconcile_config_args(args=args)

# Check that the config passes the schema
validate_config(config, schema=DEFAULT_CONFIG_SCHEMA, config_type="YAML configuration file")


def test_reconcile_config_args_no_config_with_overrides() -> None:
"""Test the handle config file function with no config and overrides."""
args = argparse.Namespace(
program="process",
config_file=None,
output_dir="./dummy_output_dir",
)
config = reconcile_config_args(args=args)

# Check that the overrides have been applied
assert config["output_dir"] == Path("./dummy_output_dir")
# Check that the config still passes the schema
validate_config(config, schema=DEFAULT_CONFIG_SCHEMA, config_type="YAML configuration file")


def test_reconcile_config_args_full_config() -> None:
"""Test the handle config file function with a full config."""
args = argparse.Namespace(program="process", config_file=f"{BASE_DIR / 'topostats' / 'default_config.yaml'}")

config = reconcile_config_args(args=args)

# Check that the config passes the schema
validate_config(config, schema=DEFAULT_CONFIG_SCHEMA, config_type="YAML configuration file")


def test_reconcile_config_args_partial_config() -> None:
"""Test the reconcile_config_args function with a partial config."""
args = argparse.Namespace(
program="process", config_file=f"{BASE_DIR / 'tests' / 'resources' / 'test_partial_config.yaml'}"
)
config = reconcile_config_args(args=args)

# Check that the partial config has overridden the default config
assert config["filter"]["threshold_method"] == "absolute"
# Check that the config still passes the schema
validate_config(config, schema=DEFAULT_CONFIG_SCHEMA, config_type="YAML configuration file")


def test_reconcile_config_args_partial_config_with_overrides() -> None:
"""Test the reconcile_config_args function with a partial config and overrides."""
args = argparse.Namespace(
program="process",
config_file=f"{BASE_DIR / 'tests' / 'resources' / 'test_partial_config.yaml'}",
output_dir="./dummy_output_dir",
)
config = reconcile_config_args(args=args)

# Check that the partial config has overridden the default config
assert config["filter"]["threshold_method"] == "absolute"
# Check that the overrides have been applied
assert config["output_dir"] == Path("./dummy_output_dir")
# Check that the config still passes the schema
validate_config(config, schema=DEFAULT_CONFIG_SCHEMA, config_type="YAML configuration file")


@pytest.mark.parametrize("option", [("-h"), ("--help")])
def test_run_topostats_main_help(capsys, option) -> None:
"""Test the -h/--help flag to run_topostats."""
Expand Down
3 changes: 0 additions & 3 deletions topostats/default_config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
# Config file generated 2024-07-29 11:11:05
# # For more information on configuration and how to use it:
# https://afm-spm.github.io/TopoStats/main/configuration.html
base_dir: ./ # Directory in which to search for data files
output_dir: ./output # Directory to output results to
log_level: info # Verbosity of output. Options: warning, error, info, debug
Expand Down
7 changes: 7 additions & 0 deletions topostats/entry_point.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,13 @@ def create_parser() -> arg.ArgumentParser:
default=None,
help="Configuration to use, currently only one is supported, the 'default'.",
)
create_config_parser.add_argument(
"-s",
"--simple",
dest="simple",
action="store_true",
help="Create a simple configuration file with only the most common options.",
)
create_config_parser.set_defaults(func=write_config_with_comments)

create_matplotlibrc_parser = subparsers.add_parser(
Expand Down
42 changes: 40 additions & 2 deletions topostats/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
import pickle as pkl
import re
import struct
from collections.abc import MutableMapping
from datetime import datetime
from importlib import resources
from pathlib import Path
from typing import Any
from typing import Any, TypeVar

import h5py
import numpy as np
Expand All @@ -36,6 +37,39 @@
# pylint: disable=broad-except
# pylint: disable=too-many-lines

MutableMappingType = TypeVar("MutableMappingType", bound="MutableMapping")


def merge_mappings(map1: MutableMappingType, map2: MutableMappingType) -> MutableMappingType:
"""
Merge two mappings (dictionaries), with priority given to the second mapping.
Note: Using a Mapping should make this robust to any mapping type, not just dictionaries. MutableMapping was needed
as Mapping is not a mutable type, and this function needs to be able to change the dictionaries.
Parameters
----------
map1 : MutableMapping
First mapping to merge, with secondary priority.
map2 : MutableMapping
Second mapping to merge, with primary priority.
Returns
-------
dict
Merged dictionary.
"""
# Iterate over the second mapping
for key, value in map2.items():
# If the value is another mapping, then recurse
if isinstance(value, MutableMapping):
# If the key is not in the first mapping, add it as an empty dictionary before recursing
map1[key] = merge_mappings(map1.get(key, {}), value)
else:
# Else simply add / override the key value pair
map1[key] = value
return map1


# Sylvia: Ruff says too complex but I think breaking this out would be more complex.
def dict_almost_equal(dict1: dict, dict2: dict, abs_tol: float = 1e-9): # noqa: C901
Expand Down Expand Up @@ -177,7 +211,11 @@ def write_config_with_comments(args=None) -> None:
logger_msg = "A sample configuration has been written to"
# If no config or default is requested we load the default_config.yaml
if args.config is None or args.config == "default":
config = (resources.files(__package__) / "default_config.yaml").read_text()
if args.simple:
config_path = resources.files(__package__) / "simple_config.yaml"
else:
config_path = resources.files(__package__) / "default_config.yaml"
config = config_path.read_text()
elif args.config == "topostats.mplstyle":
config = (resources.files(__package__) / "topostats.mplstyle").read_text()
logger_msg = "A sample matplotlibrc parameters file has been written to"
Expand Down
Loading

0 comments on commit eddd2ef

Please sign in to comment.