Skip to content

Commit

Permalink
Merge pull request #30 from asfadmin/rew/pr-6293-cmr-storage
Browse files Browse the repository at this point in the history
PR-6293 Add storage for querying CMR
  • Loading branch information
reweeden authored Dec 12, 2024
2 parents 3a22276 + a4c9f08 commit 0898c7d
Show file tree
Hide file tree
Showing 19 changed files with 882 additions and 138 deletions.
6 changes: 3 additions & 3 deletions mandible/jsonpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
def get(data: dict, path: str) -> list:
# Fall back to simple dot paths
if jsonpath_ng is None:
if path == "$":
return [data]

val = data
for part in path.split("."):
if part == "$":
continue

val = val[part]

return [val]
Expand Down
3 changes: 2 additions & 1 deletion mandible/metadata_mapper/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from .context import Context
from .format import Format
from .mapper import MetadataMapper, MetadataMapperError
from .source import ConfigSourceProvider, FileSource, PySourceProvider
from .source import FileSource
from .source_provider import ConfigSourceProvider, PySourceProvider

__all__ = [
"ConfigSourceProvider",
Expand Down
67 changes: 66 additions & 1 deletion mandible/metadata_mapper/context.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,73 @@
import dataclasses
from dataclasses import dataclass, field
from typing import Any

from mandible import jsonpath

from .exception import ContextValueError


@dataclass
class Context:
files: list[dict[str, Any]] = field(default_factory=list)
meta: list[str, Any] = field(default_factory=dict)
meta: dict[str, Any] = field(default_factory=dict)


@dataclass
class ContextValue:
"""A marker that should be replaced by a value from the Context"""

path: str


def replace_context_values(
obj: Any,
context: Context,
) -> Any:
return _replace_context_values(obj, dataclasses.asdict(context))


def _replace_context_values(obj: Any, context_dict: dict) -> Any:
if isinstance(obj, ContextValue):
try:
result = jsonpath.get(context_dict, obj.path)
except Exception as e:
raise ContextValueError(
f"jsonpath error for path {repr(obj.path)}: {e}",
) from e

if not result:
raise ContextValueError(
f"context missing value for path {repr(obj.path)}",
)
if len(result) > 1:
raise ContextValueError(
f"context path {repr(obj.path)} returned more than "
f"one value",
)

return result[0]

if isinstance(obj, dict):
return {
k: _replace_context_values(v, context_dict)
for k, v in obj.items()
}

if isinstance(obj, list):
return [_replace_context_values(v, context_dict) for v in obj]

if dataclasses.is_dataclass(obj) and not isinstance(obj, type):
replaced = dataclasses.replace(
obj,
**{
field_obj.name: _replace_context_values(
getattr(obj, field_obj.name),
context_dict,
)
for field_obj in dataclasses.fields(obj)
},
)
return replaced

return obj
19 changes: 19 additions & 0 deletions mandible/metadata_mapper/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,22 @@ def __str__(self) -> str:
debug = f" at {self.debug_path}"

return f"failed to process template{debug}: {self.msg}"


class ContextValueError(MetadataMapperError):
"""An error that occurred while processing the context value replacements."""

def __init__(
self,
msg: str,
source_name: str = None,
):
super().__init__(msg)
self.source_name = source_name

def __str__(self) -> str:
debug = ""
if self.source_name is not None:
debug = f" for source {repr(self.source_name)}"

return f"failed to process context values{debug}: {self.msg}"
23 changes: 18 additions & 5 deletions mandible/metadata_mapper/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import logging
from typing import Any, Optional

from .context import Context
from .context import Context, replace_context_values
from .directive import DIRECTIVE_REGISTRY, TemplateDirective
from .exception import MetadataMapperError, TemplateError
from .source import Source, SourceProvider
from .exception import ContextValueError, MetadataMapperError, TemplateError
from .source import Source
from .source_provider import SourceProvider
from .types import Template

log = logging.getLogger(__name__)
Expand All @@ -29,8 +30,20 @@ def get_metadata(self, context: Context) -> Template:
else:
sources = {}

for name, source in sources.items():
try:
sources[name] = replace_context_values(source, context)
except ContextValueError as e:
e.source_name = name
raise
except Exception as e:
raise MetadataMapperError(
f"failed to inject context values into source "
f"{repr(name)}: {e}",
) from e

try:
self._cache_source_keys(context, sources)
self._prepare_directives(context, sources)
except TemplateError:
raise
except Exception as e:
Expand All @@ -56,7 +69,7 @@ def get_metadata(self, context: Context) -> Template:
f"failed to evaluate template: {e}"
) from e

def _cache_source_keys(self, context: Context, sources: dict[str, Source]):
def _prepare_directives(self, context: Context, sources: dict[str, Source]):
for value, debug_path in _walk_values(self.template):
if isinstance(value, dict):
directive_name = self._get_directive_name(value, debug_path)
Expand Down
120 changes: 3 additions & 117 deletions mandible/metadata_mapper/source.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,18 @@
import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Optional, TypeVar
from typing import Any

from .context import Context
from .format import FORMAT_REGISTRY, Format
from .format import Format
from .key import Key
from .storage import STORAGE_REGISTRY, Storage
from .storage import Storage

log = logging.getLogger(__name__)

T = TypeVar("T")

SOURCE_REGISTRY: dict[str, type["Source"]] = {}

REGISTRY_TYPE_MAP = {
"Format": FORMAT_REGISTRY,
"Storage": STORAGE_REGISTRY,
"Source": SOURCE_REGISTRY,
}


@dataclass
class Source(ABC):
Expand Down Expand Up @@ -65,110 +58,3 @@ def query_all_values(self, context: Context):
new_values
)
self._values.update(new_values)


class SourceProviderError(Exception):
pass


class SourceProvider(ABC):
@abstractmethod
def get_sources(self) -> dict[str, Source]:
pass


class PySourceProvider(SourceProvider):
"""Dummy provider that passes sources through as a python object"""

def __init__(self, sources: dict[str, Source]):
self.sources = sources

def get_sources(self) -> dict[str, Source]:
return self.sources


class ConfigSourceProvider(SourceProvider):
"""Provide sources from JSON object config"""

def __init__(self, config: dict):
self.config = config

def get_sources(self) -> dict[str, Source]:
return {
key: self._create_source(key, config)
for key, config in self.config.items()
}

def _create_source(self, key: str, config: dict) -> Source:
cls_name = config.get("class") or FileSource.__name__
cls = SOURCE_REGISTRY.get(cls_name)
if cls is None:
raise SourceProviderError(f"{key} invalid source type {repr(cls_name)}")

try:
return self._instantiate_class(cls, config)
except Exception as e:
raise SourceProviderError(
f"failed to create source {repr(key)}: {e}",
) from e

def _create_object(
self,
parent_cls: type[Any],
key: str,
config: dict,
) -> Any:
cls_name = config.get("class")
if cls_name is None:
raise SourceProviderError(
f"missing key 'class' in config {config}"
)

# TODO(reweeden): As of python3.10, inspect.get_annotations(parent_cls)
# should be used instead here.
base_cls = parent_cls.__annotations__[key]

cls = self._get_class_from_registry(base_cls, cls_name)
if cls is None:
raise SourceProviderError(f"invalid {key} type {repr(cls_name)}")

if not issubclass(cls, base_cls):
raise SourceProviderError(
f"invalid {key} type {repr(cls_name)} must be a subclass of "
f"{repr(base_cls.__name__)}",
)

return self._instantiate_class(cls, config)

def _get_class_from_registry(
self,
base_cls: type[Any],
cls_name: str,
) -> Optional[type[Any]]:
cls = REGISTRY_TYPE_MAP.get(base_cls.__name__, {}).get(cls_name)

if cls is None:
for parent_base_cls in base_cls.__mro__:
cls = REGISTRY_TYPE_MAP.get(
parent_base_cls.__name__,
{},
).get(cls_name)
if cls is not None:
break

return cls

def _instantiate_class(self, cls: type[T], config: dict[str, Any]) -> T:
kwargs = {
k: self._convert_arg(cls, k, v)
for k, v in config.items()
if k != "class"
}

return cls(**kwargs)

def _convert_arg(self, parent_cls: type[Any], key: str, arg: Any) -> Any:
if isinstance(arg, dict) and "class" in arg:
return self._create_object(parent_cls, key, arg)

return arg
Loading

0 comments on commit 0898c7d

Please sign in to comment.