Merge pull request #35 from asfadmin/rew/additional-lints

Add additional lint checks to GitHub Actions
asfadmin · Jan 31, 2025 · 8ab205a · 8ab205a
2 parents 4acdd4e + a43491e
commit 8ab205a
Show file tree

Hide file tree

Showing 21 changed files with 224 additions and 126 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -1,10 +1,12 @@
 name: Lint
+permissions:
+  contents: read
 
 on:
   pull_request:
 
 jobs:
-  lint:
+  flake8:
     runs-on: ubuntu-latest
 
     steps:
@@ -15,4 +17,26 @@ jobs:
       - uses: TrueBrain/actions-flake8@v2
         with:
           flake8_version: 6.0.0
-          plugins: flake8-isort==6.1.1 flake8-quotes==3.4.0
+          plugins: flake8-isort==6.1.1 flake8-quotes==3.4.0 flake8-commas==4.0.0
+
+  mypy:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.9
+
+      - run: |
+          pip install poetry
+          poetry install --with=mypy -E all
+
+      - run: |
+          poetry run mypy \
+            --check-untyped-defs \
+            --disable-error-code=import-untyped \
+            --strict-equality \
+            --warn-redundant-casts \
+            --warn-unused-ignores \
+            mandible
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -1,4 +1,6 @@
 name: Test
+permissions:
+  contents: read
 
 on:
   push:

diff --git a/mandible/metadata_mapper/builder.py b/mandible/metadata_mapper/builder.py
@@ -103,7 +103,7 @@ def mapped(
     directive_name = Mapped.directive_name
     assert directive_name is not None
 
-    params = {
+    params: dict[str, Any] = {
         "source": source,
         "key": key,
     }

diff --git a/mandible/metadata_mapper/directive/reformatted.py b/mandible/metadata_mapper/directive/reformatted.py
@@ -4,8 +4,9 @@
 
 from mandible.metadata_mapper.exception import MetadataMapperError
 from mandible.metadata_mapper.format import FORMAT_REGISTRY
+from mandible.metadata_mapper.types import Key
 
-from .directive import Key, TemplateDirective, get_key
+from .directive import TemplateDirective, get_key
 
 
 @dataclass

diff --git a/mandible/metadata_mapper/exception.py b/mandible/metadata_mapper/exception.py
@@ -1,3 +1,6 @@
+from typing import Optional
+
+
 class MetadataMapperError(Exception):
     """A generic error raised by the MetadataMapper"""
 
@@ -8,7 +11,7 @@ def __init__(self, msg: str):
 class TemplateError(MetadataMapperError):
     """An error that occurred while processing the metadata template."""
 
-    def __init__(self, msg: str, debug_path: str = None):
+    def __init__(self, msg: str, debug_path: Optional[str] = None):
         super().__init__(msg)
         self.debug_path = debug_path
 
@@ -26,7 +29,7 @@ class ContextValueError(MetadataMapperError):
     def __init__(
         self,
         msg: str,
-        source_name: str = None,
+        source_name: Optional[str] = None,
     ):
         super().__init__(msg)
         self.source_name = source_name

diff --git a/mandible/metadata_mapper/format/__init__.py b/mandible/metadata_mapper/format/__init__.py
@@ -11,12 +11,12 @@
 try:
     from .h5 import H5
 except ImportError:
-    from .format import H5
+    from .format import H5  # type: ignore
 
 try:
     from .xml import Xml
 except ImportError:
-    from .format import Xml
+    from .format import Xml  # type: ignore
 
 
 __all__ = (

diff --git a/mandible/metadata_mapper/format/format.py b/mandible/metadata_mapper/format/format.py
@@ -1,11 +1,12 @@
 import contextlib
+import inspect
 import json
 import re
 import zipfile
 from abc import ABC, abstractmethod
-from collections.abc import Iterable
+from collections.abc import Generator, Iterable
 from dataclasses import dataclass
-from typing import IO, Any, TypeVar
+from typing import IO, Any, Generic, TypeVar
 
 from mandible import jsonpath
 from mandible.metadata_mapper.key import RAISE_EXCEPTION, Key
@@ -50,7 +51,7 @@ def get_value(self, file: IO[bytes], key: Key) -> Any:
 
 
 @dataclass
-class FileFormat(Format, ABC, register=False):
+class FileFormat(Format, Generic[T], ABC, register=False):
     """A Format for querying files from a standard data file.
 
     Simple, single format data types such as 'json' that can be queried
@@ -76,7 +77,7 @@ def get_value(self, file: IO[bytes], key: Key) -> Any:
         with self.parse_data(file) as data:
             return self._eval_key_wrapper(data, key)
 
-    def _eval_key_wrapper(self, data, key: Key) -> Any:
+    def _eval_key_wrapper(self, data: T, key: Key) -> Any:
         try:
             return self.eval_key(data, key)
         except KeyError as e:
@@ -116,24 +117,26 @@ def eval_key(data: T, key: Key) -> Any:
 
 
 @dataclass
-class _PlaceholderBase(FileFormat, register=False):
+class _PlaceholderBase(FileFormat[None], register=False):
     """
     Base class for defining placeholder implementations for classes that
     require extra dependencies to be installed
     """
     def __init__(self, dep: str):
         raise Exception(
             f"{dep} must be installed to use the {self.__class__.__name__} "
-            "format class"
+            "format class",
         )
 
     @staticmethod
-    def parse_data(file: IO[bytes]) -> contextlib.AbstractContextManager[T]:
-        pass
+    def parse_data(file: IO[bytes]) -> contextlib.AbstractContextManager[None]:
+        # __init__ always raises
+        raise RuntimeError("Unreachable!")
 
     @staticmethod
-    def eval_key(data: T, key: Key):
-        pass
+    def eval_key(data: None, key: Key):
+        # __init__ always raises
+        raise RuntimeError("Unreachable!")
 
 
 @dataclass
@@ -151,10 +154,10 @@ def __init__(self):
 # Define formats that don't require extra dependencies
 
 @dataclass
-class Json(FileFormat):
+class Json(FileFormat[dict]):
     @staticmethod
     @contextlib.contextmanager
-    def parse_data(file: IO[bytes]) -> dict:
+    def parse_data(file: IO[bytes]) -> Generator[dict]:
         yield json.load(file)
 
     @staticmethod
@@ -237,20 +240,26 @@ def _matches_filters(self, zipinfo: zipfile.ZipInfo) -> bool:
         return True
 
 
+ZIP_INFO_ATTRS = [
+    name
+    for name, _ in inspect.getmembers(zipfile.ZipInfo, inspect.isdatadescriptor)
+    if not name.startswith("_")
+]
+
+
 @dataclass
-class ZipInfo(FileFormat):
+class ZipInfo(FileFormat[dict]):
     """Query Zip headers and directory information."""
 
     @staticmethod
     @contextlib.contextmanager
-    def parse_data(file: IO[bytes]) -> dict:
+    def parse_data(file: IO[bytes]) -> Generator[dict]:
         with zipfile.ZipFile(file, "r") as zf:
             yield {
                 "infolist": [
                     {
                         k: getattr(info, k)
-                        for k in info.__slots__
-                        if not k.startswith("_")
+                        for k in ZIP_INFO_ATTRS
                     }
                     for info in zf.infolist()
                 ],

diff --git a/mandible/metadata_mapper/format/h5.py b/mandible/metadata_mapper/format/h5.py
@@ -11,13 +11,13 @@
 
 
 @dataclass
-class H5(FileFormat):
+class H5(FileFormat[Any]):
     @staticmethod
     def parse_data(file: IO[bytes]) -> contextlib.AbstractContextManager[Any]:
         return h5py.File(file, "r")
 
     @staticmethod
-    def eval_key(data, key: Key) -> Any:
+    def eval_key(data: Any, key: Key) -> Any:
         return normalize(data[key.key][()])
 
 

diff --git a/mandible/metadata_mapper/format/xml.py b/mandible/metadata_mapper/format/xml.py
@@ -1,6 +1,7 @@
 import contextlib
+from collections.abc import Generator, Iterable
 from dataclasses import dataclass
-from typing import IO, Any
+from typing import IO, Any, Union
 
 from lxml import etree
 
@@ -10,16 +11,34 @@
 
 
 @dataclass
-class Xml(FileFormat):
+class Xml(FileFormat[etree._ElementTree]):
     @staticmethod
     @contextlib.contextmanager
-    def parse_data(file: IO[bytes]) -> Any:
+    def parse_data(file: IO[bytes]) -> Generator[etree._ElementTree]:
         yield etree.parse(file)
 
     @staticmethod
-    def eval_key(data: etree.ElementTree, key: Key) -> Any:
+    def eval_key(data: etree._ElementTree, key: Key) -> Any:
         nsmap = data.getroot().nsmap
-        elements = data.xpath(key.key, namespaces=nsmap)
-        values = [element.text for element in elements]
+        xpath_result = data.xpath(
+            key.key,
+            # Lxml type stubs don't handle None key for default namespaces
+            namespaces=nsmap,  # type: ignore
+        )
+        if isinstance(xpath_result, Iterable):
+            values = [convert_result(item) for item in xpath_result]
 
-        return key.resolve_list_match(values)
+            return key.resolve_list_match(values)
+
+        # Xpath supports functions such as `count` that can result in
+        # `data.xpath` returning something other than a list of matches.
+        return xpath_result
+
+
+def convert_result(result: Union[etree._Element, int, str, bytes, tuple]):
+    if isinstance(result, etree._Element):
+        return result.text
+    if isinstance(result, (int, str, bytes)):
+        return result
+
+    raise TypeError(f"Unsupported type {repr(result.__class__.__name__)}")