diff --git a/fixture/pcodec/codec.00/config.json b/fixture/pcodec/codec.00/config.json index b9c25664..bf3a558d 100644 --- a/fixture/pcodec/codec.00/config.json +++ b/fixture/pcodec/codec.00/config.json @@ -1,8 +1,7 @@ { "delta_encoding_order": null, "equal_pages_up_to": 262144, - "float_mult_spec": "enabled", "id": "pcodec", - "int_mult_spec": "enabled", - "level": 8 + "level": 8, + "mode_spec": "auto" } \ No newline at end of file diff --git a/fixture/pcodec/codec.01/config.json b/fixture/pcodec/codec.01/config.json index f40486a3..d4715fa5 100644 --- a/fixture/pcodec/codec.01/config.json +++ b/fixture/pcodec/codec.01/config.json @@ -1,8 +1,7 @@ { "delta_encoding_order": null, "equal_pages_up_to": 262144, - "float_mult_spec": "enabled", "id": "pcodec", - "int_mult_spec": "enabled", - "level": 1 + "level": 1, + "mode_spec": "auto" } \ No newline at end of file diff --git a/fixture/pcodec/codec.02/config.json b/fixture/pcodec/codec.02/config.json index e2e186e6..0fe7a745 100644 --- a/fixture/pcodec/codec.02/config.json +++ b/fixture/pcodec/codec.02/config.json @@ -1,8 +1,7 @@ { "delta_encoding_order": null, "equal_pages_up_to": 262144, - "float_mult_spec": "enabled", "id": "pcodec", - "int_mult_spec": "enabled", - "level": 5 + "level": 5, + "mode_spec": "auto" } \ No newline at end of file diff --git a/fixture/pcodec/codec.03/config.json b/fixture/pcodec/codec.03/config.json index d6b837a0..727ee624 100644 --- a/fixture/pcodec/codec.03/config.json +++ b/fixture/pcodec/codec.03/config.json @@ -1,8 +1,7 @@ { "delta_encoding_order": null, "equal_pages_up_to": 262144, - "float_mult_spec": "enabled", "id": "pcodec", - "int_mult_spec": "enabled", - "level": 9 + "level": 9, + "mode_spec": "auto" } \ No newline at end of file diff --git a/fixture/pcodec/codec.04/config.json b/fixture/pcodec/codec.04/config.json index f6ece7c0..669bd782 100644 --- a/fixture/pcodec/codec.04/config.json +++ b/fixture/pcodec/codec.04/config.json @@ -1,8 +1,7 @@ { "delta_encoding_order": null, "equal_pages_up_to": 262144, - "float_mult_spec": "disabled", "id": "pcodec", - "int_mult_spec": "disabled", - "level": 8 + "level": 8, + "mode_spec": "classic" } \ No newline at end of file diff --git a/fixture/pcodec/codec.05/config.json b/fixture/pcodec/codec.05/config.json index 5d68ba26..fec50f2c 100644 --- a/fixture/pcodec/codec.05/config.json +++ b/fixture/pcodec/codec.05/config.json @@ -1,8 +1,7 @@ { "delta_encoding_order": null, "equal_pages_up_to": 300, - "float_mult_spec": "enabled", "id": "pcodec", - "int_mult_spec": "enabled", - "level": 8 + "level": 8, + "mode_spec": "auto" } \ No newline at end of file diff --git a/numcodecs/pcodec.py b/numcodecs/pcodec.py index 10273aed..b345bd2e 100644 --- a/numcodecs/pcodec.py +++ b/numcodecs/pcodec.py @@ -5,7 +5,7 @@ from numcodecs.compat import ensure_contiguous_ndarray try: - from pcodec import standalone, ChunkConfig, PagingSpec + from pcodec import standalone, ChunkConfig, PagingSpec, ModeSpec except ImportError: # pragma: no cover standalone = None @@ -20,8 +20,8 @@ class PCodec(numcodecs.abc.Codec): See `PCodec Repo `_ for more information. - PCodec supports only the following numerical dtypes: uint32, unit64, int32, - int64, float32, and float64. + PCodec supports only the following numerical dtypes: uint16, uint32, uint64, + int16, int32, int64, float16, float32, and float64. Parameters ---------- @@ -31,14 +31,11 @@ class PCodec(numcodecs.abc.Codec): delta_encoding_order : init or None Either a delta encoding level from 0-7 or None. If set to None, pcodec will try to infer the optimal delta encoding order. - int_mult_spec : {'enabled', 'disabled'} - If enabled, pcodec will consider using int mult mode, which can - substantially improve compression ratio but decrease speed in some cases - for integer types. - float_mult_spec : {'enabled', 'disabled'} - If enabled, pcodec will consider using float mult mode, which can - substantially improve compression ratio but decrease speed in some cases - for float types. + mode_spec : {'auto', 'classic'} + Configures whether Pcodec should try to infer the best "mode" or + structure of the data (e.g. approximate multiples of 0.1) to improve + compression ratio, or skip this step and just use the numbers as-is + (Classic mode). equal_pages_up_to : int Divide the chunk into equal pages of up to this many numbers. """ @@ -49,9 +46,9 @@ def __init__( self, level: int = 8, delta_encoding_order: Optional[int] = None, - int_mult_spec: Literal["enabled", "disabled"] = "enabled", - float_mult_spec: Literal["enabled", "disabled"] = "enabled", equal_pages_up_to: int = 262144, + # TODO one day, add support for the Try* mode specs + mode_spec: Literal['auto', 'classic'] = 'auto', ): if standalone is None: # pragma: no cover raise ImportError("pcodec must be installed to use the PCodec codec.") @@ -60,20 +57,25 @@ def __init__( # match other codecs self.level = level self.delta_encoding_order = delta_encoding_order - self.int_mult_spec = int_mult_spec - self.float_mult_spec = float_mult_spec self.equal_pages_up_to = equal_pages_up_to + self.mode_spec = mode_spec def encode(self, buf): buf = ensure_contiguous_ndarray(buf) + match self.mode_spec: + case 'auto': + mode_spec = ModeSpec.auto() + case 'classic': + mode_spec = ModeSpec.classic() + case _: + raise ValueError(f"unknown value for mode_spec: {self.mode_spec}") paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to) config = ChunkConfig( compression_level=self.level, delta_encoding_order=self.delta_encoding_order, - int_mult_spec=self.int_mult_spec, - float_mult_spec=self.float_mult_spec, + mode_spec=mode_spec, paging_spec=paging_spec, ) return standalone.simple_compress(buf, config) diff --git a/numcodecs/tests/test_pcodec.py b/numcodecs/tests/test_pcodec.py index 98919d5c..49460901 100644 --- a/numcodecs/tests/test_pcodec.py +++ b/numcodecs/tests/test_pcodec.py @@ -24,7 +24,7 @@ PCodec(level=1), PCodec(level=5), PCodec(level=9), - PCodec(float_mult_spec="disabled", int_mult_spec="disabled"), + PCodec(mode_spec='classic'), PCodec(equal_pages_up_to=300), ] @@ -57,10 +57,15 @@ def test_config(): check_config(codec) +def test_invalid_config_error(): + with pytest.raises(ValueError): + codec = PCodec(mode_spec='bogus') + check_encode_decode_array_to_bytes(arrays[0], codec) + + def test_repr(): check_repr( - "PCodec(delta_encoding_order=None, equal_pages_up_to=262144, float_mult_spec='enabled', " - "int_mult_spec='enabled', level=3)" + "PCodec(delta_encoding_order=None, equal_pages_up_to=262144, level=3, mode_spec='auto')" ) diff --git a/pyproject.toml b/pyproject.toml index 9826168f..319b29d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ zfpy = [ "numpy<2.0.0", ] pcodec = [ - "pcodec>=0.1.0", + "pcodec>=0.2.0", ] [tool.setuptools]