Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Concatenation of virtual datasets fails due to missing Chunk Manager #382

Open
observingClouds opened this issue Jan 22, 2025 · 12 comments
Open

Comments

@observingClouds
Copy link

What I did
I created kerchunk reference files following #381 and tried to combine them via xr.merge.

import xarray as xr
import tempfile
import json
from pathlib import Path
import virtualizarr as vz
import os

#Create xarray dataset
ds1 = xr.Dataset(
    {
        "a": (("x", "y"), [[1, 2], [3, 4]]),
        "b": (("x", "y"), [[10, 20], [30, 40]]),
    },
    coords={"x": [10, 20], "y": [1, 2]},
)

ds2 = xr.Dataset(
    {
        "c": (("x", "y"), [[5, 6], [7, 8]]),
        "d": (("x", "y"), [[50, 60], [70, 80]]),
    },
    coords={"x": [1, 2], "y": [1, 2]},
)

ref1 = ds1.virtualize.to_kerchunk()
ref2 = ds2.virtualize.to_kerchunk()

## 'Outlining' all references as inlined data is not yet supported
"""NotImplementedError: Outlining all references as inlined data is not yet supported. [ToDo]"""
tempdir1 = Path(tempfile.TemporaryDirectory().name)
tempdir2 = Path(tempfile.TemporaryDirectory().name)
def outline_references(ref: dict, folder: Path = None) -> dict:
    """
    Virtualizarr currently does not support inlined references.
    To open references with virtualizarr, the references must be written to a file.
    Except the .zarray, .zattrs and .zgroup files, all references are written to disk.
    """
    refs = ref["refs"]
    for k, v in refs.items():
        if os.path.basename(k).startswith('.'):
            continue
        elif isinstance(v, str):
            file = folder / k
            if not os.path.exists(os.path.dirname(file)):
                os.makedirs(os.path.dirname(file))
            with open(folder / k, "w") as f:
                f.write(v)
            refs[k] = [str(file), 0, v.__sizeof__()]
    return ref

ref1 = outline_references(ref1, tempdir1)
ref2 = outline_references(ref2, tempdir2)


## Write references to disk (open_virtual_dataset expects a string)
with open("ref1.json", "w") as f:
    json.dump(ref1, f)

with open("ref2.json", "w") as f:
    json.dump(ref2, f)

# Note this section requires the modification in #381 
vds1 = vz.open_virtual_dataset("ref1.json", filetype='kerchunk')
vds2 = vz.open_virtual_dataset("ref2.json", filetype='kerchunk')

xr.merge([vds1, vds2])

What happened

TypeError                                 Traceback (most recent call last)
Cell In[20], line 66
     63 vds1 = vz.open_virtual_dataset("ref1.json", filetype='kerchunk')
     64 vds2 = vz.open_virtual_dataset("ref2.json", filetype='kerchunk')
---> 66 xr.merge([vds1, vds2])

File ~/virtualizarr/lib/python3.10/site-packages/xarray/core/merge.py:976, in merge(objects, compat, join, fill_value, combine_attrs)
    973         obj = obj.to_dataset()
    974     dict_like_objects.append(obj)
--> 976 merge_result = merge_core(
    977     dict_like_objects,
    978     compat,
    979     join,
    980     combine_attrs=combine_attrs,
    981     fill_value=fill_value,
    982 )
    983 return Dataset._construct_direct(**merge_result._asdict())

File ~/virtualizarr/lib/python3.10/site-packages/xarray/core/merge.py:701, in merge_core(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value, skip_align_args)
    699 collected = collect_variables_and_indexes(aligned, indexes=indexes)
    700 prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat)
--> 701 variables, out_indexes = merge_collected(
    702     collected, prioritized, compat=compat, combine_attrs=combine_attrs
    703 )
    705 dims = calculate_dimensions(variables)
    707 coord_names, noncoord_names = determine_coords(coerced)

File ~/virtualizarr/lib/python3.10/site-packages/xarray/core/merge.py:290, in merge_collected(grouped, prioritized, compat, combine_attrs, equals)
    288 variables = [variable for variable, _ in elements_list]
    289 try:
--> 290     merged_vars[name] = unique_variable(
    291         name, variables, compat, equals.get(name, None)
    292     )
    293 except MergeError:
    294     if compat != "minimal":
    295         # we need more than "minimal" compatibility (for which
    296         # we drop conflicting coordinates)

File ~/virtualizarr/lib/python3.10/site-packages/xarray/core/merge.py:137, in unique_variable(name, variables, compat, equals)
    133         break
    135 if equals is None:
    136     # now compare values with minimum number of computes
--> 137     out = out.compute()
    138     for var in variables[1:]:
    139         equals = getattr(out, compat)(var)

File ~/virtualizarr/lib/python3.10/site-packages/xarray/core/variable.py:1026, in Variable.compute(self, **kwargs)
   1008 """Manually trigger loading of this variable's data from disk or a
   1009 remote source into memory and return a new variable. The original is
   1010 left unaltered.
   (...)
   1023 dask.array.compute
   1024 """
   1025 new = self.copy(deep=False)
-> 1026 return new.load(**kwargs)

File ~/virtualizarr/lib/python3.10/site-packages/xarray/core/variable.py:1004, in Variable.load(self, **kwargs)
    987 def load(self, **kwargs):
    988     """Manually trigger loading of this variable's data from disk or a
    989     remote source into memory and return this variable.
    990 
   (...)
   1002     dask.array.compute
   1003     """
-> 1004     self._data = to_duck_array(self._data, **kwargs)
   1005     return self

File ~/virtualizarr/lib/python3.10/site-packages/xarray/namedarray/pycompat.py:129, in to_duck_array(data, **kwargs)
    126 from xarray.namedarray.parallelcompat import get_chunked_array_type
    128 if is_chunked_array(data):
--> 129     chunkmanager = get_chunked_array_type(data)
    130     loaded_data, *_ = chunkmanager.compute(data, **kwargs)  # type: ignore[var-annotated]
    131     return loaded_data

File ~/virtualizarr/lib/python3.10/site-packages/xarray/namedarray/parallelcompat.py:158, in get_chunked_array_type(*args)
    152 selected = [
    153     chunkmanager
    154     for chunkmanager in chunkmanagers.values()
    155     if chunkmanager.is_chunked_array(chunked_arr)
    156 ]
    157 if not selected:
--> 158     raise TypeError(
    159         f"Could not find a Chunk Manager which recognises type {type(chunked_arr)}"
    160     )
    161 elif len(selected) >= 2:
    162     raise TypeError(f"Multiple ChunkManagers recognise type {type(chunked_arr)}")

TypeError: Could not find a Chunk Manager which recognises type <class 'virtualizarr.manifests.array.ManifestArray'>

What I expected
I expected the virtual equivalent to the "real" datasets to return:

>>> xr.merge([ds1, ds2])
<xarray.Dataset> Size: 304B
Dimensions:  (x: 4, y: 2)
Coordinates:
  * x        (x) int64 32B 1 2 10 20
  * y        (y) int64 16B 1 2
Data variables:
    a        (x, y) float64 64B nan nan nan nan 1.0 2.0 3.0 4.0
    b        (x, y) float64 64B nan nan nan nan 10.0 20.0 30.0 40.0
    c        (x, y) float64 64B 5.0 6.0 7.0 8.0 nan nan nan nan
    d        (x, y) float64 64B 50.0 60.0 70.0 80.0 nan nan nan nan
Attributes:
    coordinates:  x y

Environment
vz.version = 1.2.1.dev19+g0d2d6ab

@norlandrhagen
Copy link
Collaborator

Hey there @observingClouds, thanks for surfacing all these bugs! It's super helpful.

This one felt familiar to #141.

I tried adding compat='override' to your xr.merge([vds1, vds2], compat='override' and it seemed to work. Definitely not a clear error though!

@observingClouds
Copy link
Author

observingClouds commented Jan 22, 2025

Thanks for your quick response. Sorry that I missed the referenced issue. How do I now load this data?

I would like to write this joined virtual dataset to disk as a reference file and load it with xarray, like

vds = xr.merge([vds1, vds2], compat='override')
vds.virtualize.to_kerchunk("ref.json", format="json")
xr.open_zarr("reference://", storage_options={'fo':"ref.json"})

This however leads to

ValueError: When changing to a larger dtype, its size must be a divisor of the total size in bytes of the last axis of the array.
Full traceback
----> 1 xr.open_zarr("reference://", storage_options={'fo':"ref.json"})

File ~/virtualizarr/lib/python3.13/site-packages/xarray/backends/zarr.py:1491, in open_zarr(store, group, synchronizer, chunks, decode_cf, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, consolidated, overwrite_encoded_chunks, chunk_store, storage_options, decode_timedelta, use_cftime, zarr_version, zarr_format, use_zarr_fill_value_as_mask, chunked_array_type, from_array_kwargs, **kwargs)
   1477     raise TypeError(
   1478         "open_zarr() got unexpected keyword arguments " + ",".join(kwargs.keys())
   1479     )
   1481 backend_kwargs = {
   1482     "synchronizer": synchronizer,
   1483     "consolidated": consolidated,
   (...)
   1488     "zarr_format": zarr_format,
   1489 }
-> 1491 ds = open_dataset(
   1492     filename_or_obj=store,
   1493     group=group,
   1494     decode_cf=decode_cf,
   1495     mask_and_scale=mask_and_scale,
   1496     decode_times=decode_times,
   1497     concat_characters=concat_characters,
   1498     decode_coords=decode_coords,
   1499     engine="zarr",
   1500     chunks=chunks,
   1501     drop_variables=drop_variables,
   1502     chunked_array_type=chunked_array_type,
   1503     from_array_kwargs=from_array_kwargs,
   1504     backend_kwargs=backend_kwargs,
   1505     decode_timedelta=decode_timedelta,
   1506     use_cftime=use_cftime,
   1507     zarr_version=zarr_version,
   1508     use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask,
   1509 )
   1510 return ds

File ~/virtualizarr/lib/python3.13/site-packages/xarray/backends/api.py:679, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    667 decoders = _resolve_decoders_kwargs(
    668     decode_cf,
    669     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    675     decode_coords=decode_coords,
    676 )
    678 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 679 backend_ds = backend.open_dataset(
    680     filename_or_obj,
    681     drop_variables=drop_variables,
    682     **decoders,
    683     **kwargs,
    684 )
    685 ds = _dataset_from_backend_dataset(
    686     backend_ds,
    687     filename_or_obj,
   (...)
    697     **kwargs,
    698 )
    699 return ds

File ~/virtualizarr/lib/python3.13/site-packages/xarray/backends/zarr.py:1581, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, zarr_version, zarr_format, store, engine, use_zarr_fill_value_as_mask, cache_members)
   1579 store_entrypoint = StoreBackendEntrypoint()
   1580 with close_on_error(store):
-> 1581     ds = store_entrypoint.open_dataset(
   1582         store,
   1583         mask_and_scale=mask_and_scale,
   1584         decode_times=decode_times,
   1585         concat_characters=concat_characters,
   1586         decode_coords=decode_coords,
   1587         drop_variables=drop_variables,
   1588         use_cftime=use_cftime,
   1589         decode_timedelta=decode_timedelta,
   1590     )
   1591 return ds

File ~/virtualizarr/lib/python3.13/site-packages/xarray/backends/store.py:59, in StoreBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta)
     45 encoding = filename_or_obj.get_encoding()
     47 vars, attrs, coord_names = conventions.decode_cf_variables(
     48     vars,
     49     attrs,
   (...)
     56     decode_timedelta=decode_timedelta,
     57 )
---> 59 ds = Dataset(vars, attrs=attrs)
     60 ds = ds.set_coords(coord_names.intersection(vars))
     61 ds.set_close(filename_or_obj.close)

File ~/virtualizarr/lib/python3.13/site-packages/xarray/core/dataset.py:747, in Dataset.__init__(self, data_vars, coords, attrs)
    744 if isinstance(coords, Dataset):
    745     coords = coords._variables
--> 747 variables, coord_names, dims, indexes, _ = merge_data_and_coords(
    748     data_vars, coords
    749 )
    751 self._attrs = dict(attrs) if attrs else None
    752 self._close = None

File ~/virtualizarr/lib/python3.13/site-packages/xarray/core/dataset.py:460, in merge_data_and_coords(data_vars, coords)
    456     coords = create_coords_with_default_indexes(coords, data_vars)
    458 # exclude coords from alignment (all variables in a Coordinates object should
    459 # already be aligned together) and use coordinates' indexes to align data_vars
--> 460 return merge_core(
    461     [data_vars, coords],
    462     compat="broadcast_equals",
    463     join="outer",
    464     explicit_coords=tuple(coords),
    465     indexes=coords.xindexes,
    466     priority_arg=1,
    467     skip_align_args=[1],
    468 )

File ~/virtualizarr/lib/python3.13/site-packages/xarray/core/merge.py:699, in merge_core(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value, skip_align_args)
    696 for pos, obj in skip_align_objs:
    697     aligned.insert(pos, obj)
--> 699 collected = collect_variables_and_indexes(aligned, indexes=indexes)
    700 prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat)
    701 variables, out_indexes = merge_collected(
    702     collected, prioritized, compat=compat, combine_attrs=combine_attrs
    703 )

File ~/virtualizarr/lib/python3.13/site-packages/xarray/core/merge.py:362, in collect_variables_and_indexes(list_of_mappings, indexes)
    360     append(name, variable, indexes[name])
    361 elif variable.dims == (name,):
--> 362     idx, idx_vars = create_default_index_implicit(variable)
    363     append_all(idx_vars, {k: idx for k in idx_vars})
    364 else:

File ~/virtualizarr/lib/python3.13/site-packages/xarray/core/indexes.py:1425, in create_default_index_implicit(dim_variable, all_variables)
   1423 else:
   1424     dim_var = {name: dim_variable}
-> 1425     index = PandasIndex.from_variables(dim_var, options={})
   1426     index_vars = index.create_variables(dim_var)
   1428 return index, index_vars

File ~/virtualizarr/lib/python3.13/site-packages/xarray/core/indexes.py:654, in PandasIndex.from_variables(cls, variables, options)
    651     if level is not None:
    652         data = var._data.array.get_level_values(level)
--> 654 obj = cls(data, dim, coord_dtype=var.dtype)
    655 assert not isinstance(obj.index, pd.MultiIndex)
    656 # Rename safely
    657 # make a shallow copy: cheap and because the index name may be updated
    658 # here or in other constructors (cannot use pd.Index.rename as this
    659 # constructor is also called from PandasMultiIndex)

File ~/virtualizarr/lib/python3.13/site-packages/xarray/core/indexes.py:589, in PandasIndex.__init__(self, array, dim, coord_dtype, fastpath)
    587     index = array
    588 else:
--> 589     index = safe_cast_to_index(array)
    591 if index.name is None:
    592     # make a shallow copy: cheap and because the index name may be updated
    593     # here or in other constructors (cannot use pd.Index.rename as this
    594     # constructor is also called from PandasMultiIndex)
    595     index = index.copy()

File ~/virtualizarr/lib/python3.13/site-packages/xarray/core/indexes.py:469, in safe_cast_to_index(array)
    459             emit_user_level_warning(
    460                 (
    461                     "`pandas.Index` does not support the `float16` dtype."
   (...)
    465                 category=DeprecationWarning,
    466             )
    467             kwargs["dtype"] = "float64"
--> 469     index = pd.Index(np.asarray(array), **kwargs)
    471 return _maybe_cast_to_cftimeindex(index)

File ~/virtualizarr/lib/python3.13/site-packages/xarray/core/indexing.py:509, in ExplicitlyIndexed.__array__(self, dtype, copy)
    504 def __array__(
    505     self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None
    506 ) -> np.ndarray:
    507     # Leave casting to an array up to the underlying array type.
    508     if Version(np.__version__) >= Version("2.0.0"):
--> 509         return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy)
    510     else:
    511         return np.asarray(self.get_duck_array(), dtype=dtype)

File ~/virtualizarr/lib/python3.13/site-packages/xarray/coding/variables.py:81, in _ElementwiseFunctionArray.get_duck_array(self)
     80 def get_duck_array(self):
---> 81     return self.func(self.array.get_duck_array())

File ~/virtualizarr/lib/python3.13/site-packages/xarray/core/indexing.py:652, in LazilyIndexedArray.get_duck_array(self)
    648     array = apply_indexer(self.array, self.key)
    649 else:
    650     # If the array is not an ExplicitlyIndexedNDArrayMixin,
    651     # it may wrap a BackendArray so use its __getitem__
--> 652     array = self.array[self.key]
    654 # self.array[self.key] is now a numpy array when
    655 # self.array is a BackendArray subclass
    656 # and self.key is BasicIndexer((slice(None, None, None),))
    657 # so we need the explicit check for ExplicitlyIndexed
    658 if isinstance(array, ExplicitlyIndexed):

File ~/virtualizarr/lib/python3.13/site-packages/xarray/backends/zarr.py:227, in ZarrArrayWrapper.__getitem__(self, key)
    225 elif isinstance(key, indexing.OuterIndexer):
    226     method = self._oindex
--> 227 return indexing.explicit_indexing_adapter(
    228     key, array.shape, indexing.IndexingSupport.VECTORIZED, method
    229 )

File ~/virtualizarr/lib/python3.13/site-packages/xarray/core/indexing.py:1013, in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
    991 """Support explicit indexing by delegating to a raw indexing method.
    992 
    993 Outer and/or vectorized indexers are supported by indexing a second time
   (...)
   1010 Indexing result, in the form of a duck numpy-array.
   1011 """
   1012 raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
-> 1013 result = raw_indexing_method(raw_key.tuple)
   1014 if numpy_indices.tuple:
   1015     # index the loaded np.ndarray
   1016     indexable = NumpyIndexingAdapter(result)

File ~/virtualizarr/lib/python3.13/site-packages/xarray/backends/zarr.py:217, in ZarrArrayWrapper._getitem(self, key)
    216 def _getitem(self, key):
--> 217     return self._array[key]

File ~/virtualizarr/lib/python3.13/site-packages/zarr/core/array.py:2365, in Array.__getitem__(self, selection)
   2363     return self.vindex[cast(CoordinateSelection | MaskSelection, selection)]
   2364 elif is_pure_orthogonal_indexing(pure_selection, self.ndim):
-> 2365     return self.get_orthogonal_selection(pure_selection, fields=fields)
   2366 else:
   2367     return self.get_basic_selection(cast(BasicSelection, pure_selection), fields=fields)

File ~/virtualizarr/lib/python3.13/site-packages/zarr/_compat.py:43, in _deprecate_positional_args.<locals>._inner_deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
     41 extra_args = len(args) - len(all_args)
     42 if extra_args <= 0:
---> 43     return f(*args, **kwargs)
     45 # extra_args > 0
     46 args_msg = [
     47     f"{name}={arg}"
     48     for name, arg in zip(kwonly_args[:extra_args], args[-extra_args:], strict=False)
     49 ]

File ~/virtualizarr/lib/python3.13/site-packages/zarr/core/array.py:2807, in Array.get_orthogonal_selection(self, selection, out, fields, prototype)
   2805     prototype = default_buffer_prototype()
   2806 indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid)
-> 2807 return sync(
   2808     self._async_array._get_selection(
   2809         indexer=indexer, out=out, fields=fields, prototype=prototype
   2810     )
   2811 )

File ~/virtualizarr/lib/python3.13/site-packages/zarr/core/sync.py:142, in sync(coro, loop, timeout)
    139 return_result = next(iter(finished)).result()
    141 if isinstance(return_result, BaseException):
--> 142     raise return_result
    143 else:
    144     return return_result

File ~/virtualizarr/lib/python3.13/site-packages/zarr/core/sync.py:98, in _runner(coro)
     93 """
     94 Await a coroutine and return the result of running it. If awaiting the coroutine raises an
     95 exception, the exception will be returned.
     96 """
     97 try:
---> 98     return await coro
     99 except Exception as ex:
    100     return ex

File ~/virtualizarr/lib/python3.13/site-packages/zarr/core/array.py:1230, in AsyncArray._get_selection(self, indexer, prototype, out, fields)
   1222     out_buffer = prototype.nd_buffer.create(
   1223         shape=indexer.shape,
   1224         dtype=out_dtype,
   1225         order=self._config.order,
   1226         fill_value=self.metadata.fill_value,
   1227     )
   1228 if product(indexer.shape) > 0:
   1229     # reading chunks and decoding them
-> 1230     await self.codec_pipeline.read(
   1231         [
   1232             (
   1233                 self.store_path / self.metadata.encode_chunk_key(chunk_coords),
   1234                 self.metadata.get_chunk_spec(
   1235                     chunk_coords, self._config, prototype=prototype
   1236                 ),
   1237                 chunk_selection,
   1238                 out_selection,
   1239             )
   1240             for chunk_coords, chunk_selection, out_selection in indexer
   1241         ],
   1242         out_buffer,
   1243         drop_axes=indexer.drop_axes,
   1244     )
   1245 return out_buffer.as_ndarray_like()

File ~/virtualizarr/lib/python3.13/site-packages/zarr/core/codec_pipeline.py:453, in BatchedCodecPipeline.read(self, batch_info, out, drop_axes)
    447 async def read(
    448     self,
    449     batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]],
    450     out: NDBuffer,
    451     drop_axes: tuple[int, ...] = (),
    452 ) -> None:
--> 453     await concurrent_map(
    454         [
    455             (single_batch_info, out, drop_axes)
    456             for single_batch_info in batched(batch_info, self.batch_size)
    457         ],
    458         self.read_batch,
    459         config.get("async.concurrency"),
    460     )

File ~/virtualizarr/lib/python3.13/site-packages/zarr/core/common.py:68, in concurrent_map(items, func, limit)
     65     async with sem:
     66         return await func(*item)
---> 68 return await asyncio.gather(*[asyncio.ensure_future(run(item)) for item in items])

File ~/virtualizarr/lib/python3.13/site-packages/zarr/core/common.py:66, in concurrent_map.<locals>.run(item)
     64 async def run(item: tuple[Any]) -> V:
     65     async with sem:
---> 66         return await func(*item)

File ~/virtualizarr/lib/python3.13/site-packages/zarr/core/codec_pipeline.py:270, in BatchedCodecPipeline.read_batch(self, batch_info, out, drop_axes)
    261 else:
    262     chunk_bytes_batch = await concurrent_map(
    263         [
    264             (byte_getter, array_spec.prototype)
   (...)
    268         config.get("async.concurrency"),
    269     )
--> 270     chunk_array_batch = await self.decode_batch(
    271         [
    272             (chunk_bytes, chunk_spec)
    273             for chunk_bytes, (_, chunk_spec, _, _) in zip(
    274                 chunk_bytes_batch, batch_info, strict=False
    275             )
    276         ],
    277     )
    278     for chunk_array, (_, chunk_spec, chunk_selection, out_selection) in zip(
    279         chunk_array_batch, batch_info, strict=False
    280     ):
    281         if chunk_array is not None:

File ~/virtualizarr/lib/python3.13/site-packages/zarr/core/codec_pipeline.py:177, in BatchedCodecPipeline.decode_batch(self, chunk_bytes_and_specs)
    172     chunk_bytes_batch = await bb_codec.decode(
    173         zip(chunk_bytes_batch, chunk_spec_batch, strict=False)
    174     )
    176 ab_codec, chunk_spec_batch = ab_codec_with_spec
--> 177 chunk_array_batch = await ab_codec.decode(
    178     zip(chunk_bytes_batch, chunk_spec_batch, strict=False)
    179 )
    181 for aa_codec, chunk_spec_batch in aa_codecs_with_spec[::-1]:
    182     chunk_array_batch = await aa_codec.decode(
    183         zip(chunk_array_batch, chunk_spec_batch, strict=False)
    184     )

File ~/virtualizarr/lib/python3.13/site-packages/zarr/abc/codec.py:129, in BaseCodec.decode(self, chunks_and_specs)
    113 async def decode(
    114     self,
    115     chunks_and_specs: Iterable[tuple[CodecOutput | None, ArraySpec]],
    116 ) -> Iterable[CodecInput | None]:
    117     """Decodes a batch of chunks.
    118     Chunks can be None in which case they are ignored by the codec.
    119 
   (...)
    127     Iterable[CodecInput | None]
    128     """
--> 129     return await _batching_helper(self._decode_single, chunks_and_specs)

File ~/virtualizarr/lib/python3.13/site-packages/zarr/abc/codec.py:407, in _batching_helper(func, batch_info)
    403 async def _batching_helper(
    404     func: Callable[[CodecInput, ArraySpec], Awaitable[CodecOutput | None]],
    405     batch_info: Iterable[tuple[CodecInput | None, ArraySpec]],
    406 ) -> list[CodecOutput | None]:
--> 407     return await concurrent_map(
    408         list(batch_info),
    409         _noop_for_none(func),
    410         config.get("async.concurrency"),
    411     )

File ~/virtualizarr/lib/python3.13/site-packages/zarr/core/common.py:68, in concurrent_map(items, func, limit)
     65     async with sem:
     66         return await func(*item)
---> 68 return await asyncio.gather(*[asyncio.ensure_future(run(item)) for item in items])

File ~/virtualizarr/lib/python3.13/site-packages/zarr/core/common.py:66, in concurrent_map.<locals>.run(item)
     64 async def run(item: tuple[Any]) -> V:
     65     async with sem:
---> 66         return await func(*item)

File ~/virtualizarr/lib/python3.13/site-packages/zarr/abc/codec.py:420, in _noop_for_none.<locals>.wrap(chunk, chunk_spec)
    418 if chunk is None:
    419     return None
--> 420 return await func(chunk, chunk_spec)

File ~/virtualizarr/lib/python3.13/site-packages/zarr/codecs/_v2.py:51, in V2Codec._decode_single(self, chunk_bytes, chunk_spec)
     49 if chunk_spec.dtype != object:
     50     try:
---> 51         chunk = chunk.view(chunk_spec.dtype)
     52     except TypeError:
     53         # this will happen if the dtype of the chunk
     54         # does not match the dtype of the array spec i.g. if
     55         # the dtype of the chunk_spec is a string dtype, but the chunk
     56         # is an object array. In this case, we need to convert the object
     57         # array to the correct dtype.
     59         chunk = np.array(chunk).astype(chunk_spec.dtype)

ValueError: When changing to a larger dtype, its size must be a divisor of the total size in bytes of the last axis of the array.

It seems like the types (float64 vs int64) are already of after merging (and also different dimension sizes):

#xr.merge([ds1, ds2], compat='override')
<xarray.Dataset> Size: 304B
Dimensions:  (x: 4, y: 2)
Coordinates:
  * x        (x) int64 32B 1 2 10 20
  * y        (y) int64 16B 1 2
Data variables:
    a        (x, y) float64 64B nan nan nan nan 1.0 2.0 3.0 4.0
    b        (x, y) float64 64B nan nan nan nan 10.0 20.0 30.0 40.0
    c        (x, y) float64 64B 5.0 6.0 7.0 8.0 nan nan nan nan
    d        (x, y) float64 64B 50.0 60.0 70.0 80.0 nan nan nan nan
Attributes:
    coordinates:  x y

#xr.merge([vds1, vds2], compat='override')
<xarray.Dataset> Size: 160B
Dimensions:  (x: 2, y: 2)
Coordinates:
    y        (y) int64 16B ManifestArray<shape=(2,), dtype=int64, chunks=(2,)>
    x        (x) int64 16B ManifestArray<shape=(2,), dtype=int64, chunks=(2,)>
Data variables:
    b        (x, y) int64 32B ManifestArray<shape=(2, 2), dtype=int64, chunks...
    a        (x, y) int64 32B ManifestArray<shape=(2, 2), dtype=int64, chunks...
    d        (x, y) int64 32B ManifestArray<shape=(2, 2), dtype=int64, chunks...
    c        (x, y) int64 32B ManifestArray<shape=(2, 2), dtype=int64, chunks...

@norlandrhagen
Copy link
Collaborator

Thanks for your quick response. Sorry that I missed the referenced issue. How do I now load this data?

Np and no worries! There is tons of churn in issues.

Hmm, I don't recall seeing that error before. It's not a real solution, but adding loadable_variables to the open_virtual_dataset call did the trick on my end. (Note: This was the Kerchunk parquet format).

from virtualizarr import open_virtual_dataset

## Write references to disk (open_virtual_dataset expects a string)
with open("ref1.json", "w") as f:
    json.dump(ref1, f)

with open("ref2.json", "w") as f:
    json.dump(ref2, f)

# Note this section requires the modification in #381 
vds1 = open_virtual_dataset("ref1.json", filetype='kerchunk', loadable_variables=['x','y'])
vds2 = open_virtual_dataset("ref2.json", filetype='kerchunk', loadable_variables=['x','y'])

combined_vds = xr.merge([vds1, vds2],compat='override')
combined_vds.virtualize.to_kerchunk('combined.parquet', format='parquet')
vds = xr.open_dataset("combined.parquet", engine='kerchunk')

<xarray.Dataset> Size: 128B
Dimensions:  (x: 2, y: 2)
Dimensions without coordinates: x, y
Data variables:
    a        (x, y) float64 32B ...
    b        (x, y) float64 32B ...
    c        (x, y) float64 32B ...
    d        (x, y) float64 32B ...

Kind of stepping back a bit, and while it would be good to figure out what is going wrong here, would writing to Icechunk instead of one of the legacy Kerchunk formats make sense for your use case?

@observingClouds
Copy link
Author

Thanks @norlandrhagen for troubleshooting. I will need to read up on Icechunk.

In the meantime, please note that the dimensions are still different to the one when using "real" datasets:

Dimensions: (x: 2, y: 2)

vs.

Dimensions: (x: 4, y: 2)

@TomNicholas
Copy link
Member

TomNicholas commented Jan 23, 2025

I'll try to look at this properly soon and run your example, but I suspect you might need join='override' and coords='minimal' too.

@dcherian
Copy link

yes this is pydata/xarray#8778 yet again

@observingClouds
Copy link
Author

observingClouds commented Jan 28, 2025

Okay, I am not getting the correct results yet. I tried now with xr.combine_by_coords() and this also does not work for the virtual datasets:

>>> xr.combine_by_coords([vds1,vds2])
ValueError: Every dimension requires a corresponding 1D coordinate and index for inferring concatenation order but the coordinate 'x' has no corresponding index
Full traceback
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[21], line 1
----> 1 xr.combine_by_coords([vds1,vds2])

File /etc/ecmwf/nfs/dh2_home_b/dnk9255/envs/virtual/lib/python3.13/site-packages/xarray/core/combine.py:973, in combine_by_coords(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs)
    969     grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
    971     # Perform the multidimensional combine on each group of data variables
    972     # before merging back together
--> 973     concatenated_grouped_by_data_vars = tuple(
    974         _combine_single_variable_hypercube(
    975             tuple(datasets_with_same_vars),
    976             fill_value=fill_value,
    977             data_vars=data_vars,
    978             coords=coords,
    979             compat=compat,
    980             join=join,
    981             combine_attrs=combine_attrs,
    982         )
    983         for vars, datasets_with_same_vars in grouped_by_vars
    984     )
    986 return merge(
    987     concatenated_grouped_by_data_vars,
    988     compat=compat,
   (...)
    991     combine_attrs=combine_attrs,
    992 )

File /etc/ecmwf/nfs/dh2_home_b/dnk9255/envs/virtual/lib/python3.13/site-packages/xarray/core/combine.py:974, in <genexpr>(.0)
    969     grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
    971     # Perform the multidimensional combine on each group of data variables
    972     # before merging back together
    973     concatenated_grouped_by_data_vars = tuple(
--> 974         _combine_single_variable_hypercube(
    975             tuple(datasets_with_same_vars),
    976             fill_value=fill_value,
    977             data_vars=data_vars,
    978             coords=coords,
    979             compat=compat,
    980             join=join,
    981             combine_attrs=combine_attrs,
    982         )
    983         for vars, datasets_with_same_vars in grouped_by_vars
    984     )
    986 return merge(
    987     concatenated_grouped_by_data_vars,
    988     compat=compat,
   (...)
    991     combine_attrs=combine_attrs,
    992 )

File /etc/ecmwf/nfs/dh2_home_b/dnk9255/envs/virtual/lib/python3.13/site-packages/xarray/core/combine.py:634, in _combine_single_variable_hypercube(datasets, fill_value, data_vars, coords, compat, join, combine_attrs)
    628 if len(datasets) == 0:
    629     raise ValueError(
    630         "At least one Dataset is required to resolve variable names "
    631         "for combined hypercube."
    632     )
--> 634 combined_ids, concat_dims = _infer_concat_order_from_coords(list(datasets))
    636 if fill_value is None:
    637     # check that datasets form complete hypercube
    638     _check_shape_tile_ids(combined_ids)

File /etc/ecmwf/nfs/dh2_home_b/dnk9255/envs/virtual/lib/python3.13/site-packages/xarray/core/combine.py:109, in _infer_concat_order_from_coords(datasets)
    103 if any(index is None for index in indexes):
    104     error_msg = (
    105         f"Every dimension requires a corresponding 1D coordinate "
    106         f"and index for inferring concatenation order but the "
    107         f"coordinate '{dim}' has no corresponding index"
    108     )
--> 109     raise ValueError(error_msg)
    111 # TODO (benbovy, flexible indexes): support flexible indexes?
    112 indexes = [index.to_pandas_index() for index in indexes]

ValueError: Every dimension requires a corresponding 1D coordinate and index for inferring concatenation order but the coordinate 'x' has no corresponding index

In [22]: combined_vds = xr.merge([vds1, vds2],compat='override', join='override', coords='minimal')
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[22], line 1
----> 1 combined_vds = xr.merge([vds1, vds2],compat='override', join='override', coords='minimal')

TypeError: merge() got an unexpected keyword argument 'coords'

In [23]: xr.combine_by_coords([vds1,vds2])
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[23], line 1
----> 1 xr.combine_by_coords([vds1,vds2])

File /etc/ecmwf/nfs/dh2_home_b/dnk9255/envs/virtual/lib/python3.13/site-packages/xarray/core/combine.py:973, in combine_by_coords(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs)
    969     grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
    971     # Perform the multidimensional combine on each group of data variables
    972     # before merging back together
--> 973     concatenated_grouped_by_data_vars = tuple(
    974         _combine_single_variable_hypercube(
    975             tuple(datasets_with_same_vars),
    976             fill_value=fill_value,
    977             data_vars=data_vars,
    978             coords=coords,
    979             compat=compat,
    980             join=join,
    981             combine_attrs=combine_attrs,
    982         )
    983         for vars, datasets_with_same_vars in grouped_by_vars
    984     )
    986 return merge(
    987     concatenated_grouped_by_data_vars,
    988     compat=compat,
   (...)
    991     combine_attrs=combine_attrs,
    992 )

File /etc/ecmwf/nfs/dh2_home_b/dnk9255/envs/virtual/lib/python3.13/site-packages/xarray/core/combine.py:974, in <genexpr>(.0)
    969     grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
    971     # Perform the multidimensional combine on each group of data variables
    972     # before merging back together
    973     concatenated_grouped_by_data_vars = tuple(
--> 974         _combine_single_variable_hypercube(
    975             tuple(datasets_with_same_vars),
    976             fill_value=fill_value,
    977             data_vars=data_vars,
    978             coords=coords,
    979             compat=compat,
    980             join=join,
    981             combine_attrs=combine_attrs,
    982         )
    983         for vars, datasets_with_same_vars in grouped_by_vars
    984     )
    986 return merge(
    987     concatenated_grouped_by_data_vars,
    988     compat=compat,
   (...)
    991     combine_attrs=combine_attrs,
    992 )

File /etc/ecmwf/nfs/dh2_home_b/dnk9255/envs/virtual/lib/python3.13/site-packages/xarray/core/combine.py:634, in _combine_single_variable_hypercube(datasets, fill_value, data_vars, coords, compat, join, combine_attrs)
    628 if len(datasets) == 0:
    629     raise ValueError(
    630         "At least one Dataset is required to resolve variable names "
    631         "for combined hypercube."
    632     )
--> 634 combined_ids, concat_dims = _infer_concat_order_from_coords(list(datasets))
    636 if fill_value is None:
    637     # check that datasets form complete hypercube
    638     _check_shape_tile_ids(combined_ids)

File /etc/ecmwf/nfs/dh2_home_b/dnk9255/envs/virtual/lib/python3.13/site-packages/xarray/core/combine.py:109, in _infer_concat_order_from_coords(datasets)
    103 if any(index is None for index in indexes):
    104     error_msg = (
    105         f"Every dimension requires a corresponding 1D coordinate "
    106         f"and index for inferring concatenation order but the "
    107         f"coordinate '{dim}' has no corresponding index"
    108     )
--> 109     raise ValueError(error_msg)
    111 # TODO (benbovy, flexible indexes): support flexible indexes?
    112 indexes = [index.to_pandas_index() for index in indexes]

ValueError: Every dimension requires a corresponding 1D coordinate and index for inferring concatenation order but the coordinate 'x' has no corresponding index

However, for the "real" dataset it works as expected:

 >>> xr.combine_by_coords([ds1,ds2])
<xarray.Dataset> Size: 304B
Dimensions:  (x: 4, y: 2)
Coordinates:
  * x        (x) int64 32B 1 2 10 20
  * y        (y) int64 16B 1 2
Data variables:
    a        (x, y) float64 64B nan nan nan nan 1.0 2.0 3.0 4.0
    b        (x, y) float64 64B nan nan nan nan 10.0 20.0 30.0 40.0
    c        (x, y) float64 64B 5.0 6.0 7.0 8.0 nan nan nan nan
    d        (x, y) float64 64B 50.0 60.0 70.0 80.0 nan nan nan nan
Attributes:
    coordinates:  x y

@TomNicholas
Copy link
Member

You need to pass all the same keyword arguments to combine_by_coords too. Also combine_by_coords needs the dataset to have indexes in order to work, but the default index creation behavior is slightly different for open_virtual_dataset as compared to open_dataset. I hope to align these soon.

@observingClouds
Copy link
Author

Why should I do join='override' anyway? I do want to merge the datasets and am ok with generating NaN sections.

>>> xr.combine_by_coords([ds1, ds2], join='override', coords='minimal')
<xarray.Dataset> Size: 160B
Dimensions:  (x: 2, y: 2)
Coordinates:
  * x        (x) int64 16B 10 20
  * y        (y) int64 16B 1 2
Data variables:
    a        (x, y) int64 32B 1 2 3 4
    b        (x, y) int64 32B 10 20 30 40
    c        (x, y) int64 32B 5 6 7 8
    d        (x, y) int64 32B 50 60 70 80
Attributes:
    coordinates:  x y

vs

>>> xr.combine_by_coords([ds1, ds2], join='outer')
<xarray.Dataset> Size: 304B
Dimensions:  (x: 4, y: 2)
Coordinates:
  * x        (x) int64 32B 1 2 10 20
  * y        (y) int64 16B 1 2
Data variables:
    a        (x, y) float64 64B nan nan nan nan 1.0 2.0 3.0 4.0
    b        (x, y) float64 64B nan nan nan nan 10.0 20.0 30.0 40.0
    c        (x, y) float64 64B 5.0 6.0 7.0 8.0 nan nan nan nan
    d        (x, y) float64 64B 50.0 60.0 70.0 80.0 nan nan nan nan
Attributes:
    coordinates:  x y

@maxrjones
Copy link
Member

Okay, reading the documentation, indices are created when providing loadable_variables. Doing so, however drops the coordinates:

This was fixed by #396

@observingClouds
Copy link
Author

observingClouds commented Jan 29, 2025

Thanks for referencing @maxrjones!

Providing the loadable variables with

vds1 = open_virtual_dataset("ref1.json", filetype='kerchunk', loadable_variables=['x','y'])

results in a NotImplementedError now (version: 1.2.1.dev27+gfa3aeea):

Traceback
File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/virtualizarr/backend.py:203, in open_virtual_dataset(filepath, filetype, group, drop_variables, loadable_variables, decode_times, cftime_variables, indexes, virtual_array_class, virtual_backend_kwargs, reader_options, backend)
    200 if backend_cls is None:
    201     raise NotImplementedError(f"Unsupported file type: {filetype.name}")
--> 203 vds = backend_cls.open_virtual_dataset(
    204     filepath,
    205     group=group,
    206     drop_variables=drop_variables,
    207     loadable_variables=loadable_variables,
    208     decode_times=decode_times,
    209     indexes=indexes,
    210     virtual_backend_kwargs=virtual_backend_kwargs,
    211     reader_options=reader_options,
    212 )
    214 return vds

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/virtualizarr/readers/kerchunk.py:47, in KerchunkVirtualBackend.open_virtual_dataset(filepath, group, drop_variables, loadable_variables, decode_times, indexes, virtual_backend_kwargs, reader_options)
     41 drop_variables, loadable_variables = check_for_collisions(
     42     drop_variables=drop_variables,
     43     loadable_variables=loadable_variables,
     44 )
     46 if loadable_variables or indexes or decode_times:
---> 47     raise NotImplementedError()
     49 fs = _FsspecFSFromFilepath(filepath=filepath, reader_options=reader_options)
     51 # The kerchunk .parquet storage format isn't actually a parquet, but a directory that contains named parquets for each group/variable.

NotImplementedError: 

EDIT: I thought it works now, but I used the actual dataset instead of the virtual one...

@observingClouds
Copy link
Author

observingClouds commented Jan 29, 2025

I tried to set the indices explicitly now with:

>>> vds1['x']= ds1.x
>>> vds1['y']= ds1.y
>>> vds2['x'] = ds2.x
>>> vds2['y'] = ds2.y
>>> vds1
<xarray.Dataset> Size: 96B
Dimensions:  (x: 2, y: 2)
Coordinates:
  * x        (x) int64 16B 10 20
  * y        (y) int64 16B 1 2
Data variables:
    a        (x, y) int64 32B ManifestArray<shape=(2, 2), dtype=int64, chunks...
    b        (x, y) int64 32B ManifestArray<shape=(2, 2), dtype=int64, chunks...
xr.combine_by_coords([vds1,vds2], join='outer')

This results in

NotImplementedError: Doesn't support slicing with (array([-1, -1,  0,  1]), slice(None, None, None))
Traceback
---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
Cell In[55], line 1
----> 1 xr.combine_by_coords([vds1,vds2], join='outer')

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/combine.py:986, in combine_by_coords(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs)
    971     # Perform the multidimensional combine on each group of data variables
    972     # before merging back together
    973     concatenated_grouped_by_data_vars = tuple(
    974         _combine_single_variable_hypercube(
    975             tuple(datasets_with_same_vars),
   (...)
    983         for vars, datasets_with_same_vars in grouped_by_vars
    984     )
--> 986 return merge(
    987     concatenated_grouped_by_data_vars,
    988     compat=compat,
    989     fill_value=fill_value,
    990     join=join,
    991     combine_attrs=combine_attrs,
    992 )

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/merge.py:976, in merge(objects, compat, join, fill_value, combine_attrs)
    973         obj = obj.to_dataset()
    974     dict_like_objects.append(obj)
--> 976 merge_result = merge_core(
    977     dict_like_objects,
    978     compat,
    979     join,
    980     combine_attrs=combine_attrs,
    981     fill_value=fill_value,
    982 )
    983 return Dataset._construct_direct(**merge_result._asdict())

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/merge.py:692, in merge_core(objects, compat, join, combine_attrs, priority_arg, explicit_coords, indexes, fill_value, skip_align_args)
    689 skip_align_objs = [(pos, objects.pop(pos)) for pos in skip_align_args]
    691 coerced = coerce_pandas_values(objects)
--> 692 aligned = deep_align(
    693     coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value
    694 )
    696 for pos, obj in skip_align_objs:
    697     aligned.insert(pos, obj)

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/alignment.py:947, in deep_align(objects, join, copy, indexes, exclude, raise_on_invalid, fill_value)
    944     else:
    945         out.append(variables)
--> 947 aligned = align(
    948     *targets,
    949     join=join,
    950     copy=copy,
    951     indexes=indexes,
    952     exclude=exclude,
    953     fill_value=fill_value,
    954 )
    956 for position, key, aligned_obj in zip(positions, keys, aligned, strict=True):
    957     if key is no_key:

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/alignment.py:883, in align(join, copy, indexes, exclude, fill_value, *objects)
    687 """
    688 Given any number of Dataset and/or DataArray objects, returns new
    689 objects with aligned indexes and dimension sizes.
   (...)
    873 
    874 """
    875 aligner = Aligner(
    876     objects,
    877     join=join,
   (...)
    881     fill_value=fill_value,
    882 )
--> 883 aligner.align()
    884 return aligner.results

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/alignment.py:583, in Aligner.align(self)
    581     self.results = self.objects
    582 else:
--> 583     self.reindex_all()

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/alignment.py:558, in Aligner.reindex_all(self)
    557 def reindex_all(self) -> None:
--> 558     self.results = tuple(
    559         self._reindex_one(obj, matching_indexes)
    560         for obj, matching_indexes in zip(
    561             self.objects, self.objects_matching_indexes, strict=True
    562         )
    563     )

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/alignment.py:559, in <genexpr>(.0)
    557 def reindex_all(self) -> None:
    558     self.results = tuple(
--> 559         self._reindex_one(obj, matching_indexes)
    560         for obj, matching_indexes in zip(
    561             self.objects, self.objects_matching_indexes, strict=True
    562         )
    563     )

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/alignment.py:547, in Aligner._reindex_one(self, obj, matching_indexes)
    544 new_indexes, new_variables = self._get_indexes_and_vars(obj, matching_indexes)
    545 dim_pos_indexers = self._get_dim_pos_indexers(matching_indexes)
--> 547 return obj._reindex_callback(
    548     self,
    549     dim_pos_indexers,
    550     new_variables,
    551     new_indexes,
    552     self.fill_value,
    553     self.exclude_dims,
    554     self.exclude_vars,
    555 )

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/dataset.py:3634, in Dataset._reindex_callback(self, aligner, dim_pos_indexers, variables, indexes, fill_value, exclude_dims, exclude_vars)
   3628 else:
   3629     to_reindex = {
   3630         k: v
   3631         for k, v in self.variables.items()
   3632         if k not in variables and k not in exclude_vars
   3633     }
-> 3634     reindexed_vars = alignment.reindex_variables(
   3635         to_reindex,
   3636         dim_pos_indexers,
   3637         copy=aligner.copy,
   3638         fill_value=fill_value,
   3639         sparse=aligner.sparse,
   3640     )
   3641     new_variables.update(reindexed_vars)
   3642     new_coord_names = self._coord_names | set(new_indexes)

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/alignment.py:79, in reindex_variables(variables, dim_pos_indexers, copy, fill_value, sparse)
     76 needs_masking = any(d in masked_dims for d in var.dims)
     78 if needs_masking:
---> 79     new_var = var._getitem_with_mask(indxr, fill_value=fill_value_)
     80 elif all(is_full_slice(k) for k in indxr):
     81     # no reindexing necessary
     82     # here we need to manually deal with copying data, since
     83     # we neither created a new ndarray nor used fancy indexing
     84     new_var = var.copy(deep=copy)

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/variable.py:863, in Variable._getitem_with_mask(self, key, fill_value)
    860     actual_indexer = indexer
    862 indexable = as_indexable(self._data)
--> 863 data = indexing.apply_indexer(indexable, actual_indexer)
    865 mask = indexing.create_mask(indexer, self.shape, data)
    866 # we need to invert the mask in order to pass data first. This helps
    867 # pint to choose the correct unit
    868 # TODO: revert after https://github.com/hgrecco/pint/issues/1019 is fixed

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/indexing.py:1031, in apply_indexer(indexable, indexer)
   1029     return indexable.vindex[indexer]
   1030 elif isinstance(indexer, OuterIndexer):
-> 1031     return indexable.oindex[indexer]
   1032 else:
   1033     return indexable[indexer]

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/indexing.py:369, in IndexCallable.__getitem__(self, key)
    368 def __getitem__(self, key: Any) -> Any:
--> 369     return self.getter(key)

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/xarray/core/indexing.py:1508, in NumpyIndexingAdapter._oindex_get(self, indexer)
   1506 def _oindex_get(self, indexer: OuterIndexer):
   1507     key = _outer_to_numpy_indexer(indexer, self.array.shape)
-> 1508     return self.array[key]

File ~/Documents/GitProjects/venv/virtuali/lib/python3.13/site-packages/virtualizarr/manifests/array.py:226, in ManifestArray.__getitem__(self, key)
    224     return self
    225 else:
--> 226     raise NotImplementedError(f"Doesn't support slicing with {indexer}")

NotImplementedError: Doesn't support slicing with (array([-1, -1,  0,  1]), slice(None, None, None))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

5 participants