From 2db6a18d6082d5df148b03a5f54e8259c9658e4f Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Fri, 16 Aug 2024 12:14:25 +0200
Subject: [PATCH 01/19] Remove empty line at end of file

---
 tests/data/validation/validate_data/simple_validation.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/data/validation/validate_data/simple_validation.yaml b/tests/data/validation/validate_data/simple_validation.yaml
index 2694b20f..354621e3 100644
--- a/tests/data/validation/validate_data/simple_validation.yaml
+++ b/tests/data/validation/validate_data/simple_validation.yaml
@@ -3,4 +3,3 @@
    year: 2010
    upper_bound: 2.5
    lower_bound: 1
-

From 17017e8678fb82b15081cc3d9351caabcbd561a2 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Fri, 16 Aug 2024 13:00:41 +0200
Subject: [PATCH 02/19] Make sure that validation-with-codelist passes if no
 criteria are given

---
 nomenclature/processor/iamc.py                             | 5 +++--
 tests/data/validation/validate_data/simple_validation.yaml | 3 +--
 tests/test_validate_data.py                                | 1 -
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/nomenclature/processor/iamc.py b/nomenclature/processor/iamc.py
index 8dc3936a..dc3549a2 100644
--- a/nomenclature/processor/iamc.py
+++ b/nomenclature/processor/iamc.py
@@ -25,9 +25,10 @@ def validate_with_definition(self, dsd: DataStructureDefinition) -> None:
         # check for filter-items that are not defined in the codelists
         for dimension in IAMC_IDX:
             codelist = getattr(dsd, dimension, None)
-            if codelist is None:
+            # no validation if codelist is not defined or filter-item is None
+            if codelist is None or getattr(self, dimension) is None:
                 continue
-            if invalid := codelist.validate_items(getattr(self, dimension, [])):
+            if invalid := codelist.validate_items(getattr(self, dimension)):
                 error_msg += (
                     f"The following {dimension}s are not defined in the "
                     f"DataStructureDefinition:\n   {', '.join(invalid)}\n"
diff --git a/tests/data/validation/validate_data/simple_validation.yaml b/tests/data/validation/validate_data/simple_validation.yaml
index 354621e3..a7ec5ee9 100644
--- a/tests/data/validation/validate_data/simple_validation.yaml
+++ b/tests/data/validation/validate_data/simple_validation.yaml
@@ -1,5 +1,4 @@
- - region: World
-   variable: Final Energy
+ - variable: Final Energy
    year: 2010
    upper_bound: 2.5
    lower_bound: 1
diff --git a/tests/test_validate_data.py b/tests/test_validate_data.py
index 4d339750..67dec617 100644
--- a/tests/test_validate_data.py
+++ b/tests/test_validate_data.py
@@ -12,7 +12,6 @@ def test_DataValidator_from_file():
         **{
             "criteria_items": [
                 {
-                    "region": ["World"],
                     "variable": "Final Energy",
                     "year": [2010],
                     "upper_bound": 2.5,

From 31f7baaa89e7b672e4ffbb76995c551e04ee1deb Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Fri, 16 Aug 2024 13:01:08 +0200
Subject: [PATCH 03/19] Harmonize notation

---
 nomenclature/processor/data_validator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nomenclature/processor/data_validator.py b/nomenclature/processor/data_validator.py
index dfc5158c..1c9936f7 100644
--- a/nomenclature/processor/data_validator.py
+++ b/nomenclature/processor/data_validator.py
@@ -34,9 +34,9 @@ def apply(self):
 
     def validate_with_definition(self, dsd: DataStructureDefinition) -> None:
         errors = ErrorCollector(description=f"in file '{self.file}'")
-        for data in self.criteria_items:
+        for item in self.criteria_items:
             try:
-                data.validate_with_definition(dsd)
+                item.validate_with_definition(dsd)
             except ValueError as value_error:
                 errors.append(value_error)
         if errors:

From 402817d530f8033b8af2bfb3495ef44668c40501 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Fri, 16 Aug 2024 13:05:32 +0200
Subject: [PATCH 04/19] Add `criteria` attribute

---
 nomenclature/processor/iamc.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/nomenclature/processor/iamc.py b/nomenclature/processor/iamc.py
index dc3549a2..163b22a0 100644
--- a/nomenclature/processor/iamc.py
+++ b/nomenclature/processor/iamc.py
@@ -1,4 +1,4 @@
-from typing import List, Tuple, Any
+from typing import List
 from pydantic import BaseModel, field_validator
 
 from pyam import IAMC_IDX
@@ -19,6 +19,10 @@ class IamcDataFilter(BaseModel):
     def single_input_to_list(cls, v):
         return v if isinstance(v, list) else [v]
 
+    @property
+    def criteria(self):
+        return dict(item for item in self.model_dump().items() if item[1] is not None)
+
     def validate_with_definition(self, dsd: DataStructureDefinition) -> None:
         error_msg = ""
 

From 433415d2d2e3f9a9eedd1b798d6ef563400a0316 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Fri, 16 Aug 2024 13:12:38 +0200
Subject: [PATCH 05/19] Add initial `apply` implementation

---
 nomenclature/processor/data_validator.py      | 34 +++++++++++++++++--
 .../definitions/variable/variable.yaml        |  2 ++
 tests/test_validate_data.py                   |  8 +++++
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/nomenclature/processor/data_validator.py b/nomenclature/processor/data_validator.py
index 1c9936f7..3920c31d 100644
--- a/nomenclature/processor/data_validator.py
+++ b/nomenclature/processor/data_validator.py
@@ -1,7 +1,11 @@
+import logging
 from pathlib import Path
 from typing import List, Union
 
+import pandas as pd
 import yaml
+from pyam import IamDataFrame
+from pyam.logging import adjust_log_level
 
 from nomenclature.definition import DataStructureDefinition
 from nomenclature.error import ErrorCollector
@@ -9,6 +13,8 @@
 from nomenclature.processor import Processor
 from nomenclature.processor.utils import get_relative_path
 
+logger = logging.getLogger(__name__)
+
 
 class DataValidationCriteria(IamcDataFilter):
     """Data validation criteria"""
@@ -29,8 +35,32 @@ def from_file(cls, file: Union[Path, str]) -> "DataValidator":
             content = yaml.safe_load(f)
         return cls(file=file, criteria_items=content)
 
-    def apply(self):
-        pass
+    def apply(self, df: IamDataFrame) -> IamDataFrame:
+        failed_validation_list = []
+        error = False
+
+        with adjust_log_level():
+            for item in self.criteria_items:
+                failed_validation = df.validate(**item.criteria)
+                if failed_validation is not None:
+                    for direction in ["upper_bound", "lower_bound"]:
+                        if getattr(item, direction) is not None:
+                            failed_validation[direction] = getattr(item, direction)
+                    failed_validation_list.append(failed_validation)
+
+            if failed_validation_list:
+                logger.error(
+                    "Failed data validation (file %s):\n%s",
+                    get_relative_path(self.file),
+                    pd.concat(failed_validation_list),
+                )
+                error = True
+
+            if error:
+                raise ValueError(
+                    "Data validation failed. Please check the log for details."
+                )
+        return df
 
     def validate_with_definition(self, dsd: DataStructureDefinition) -> None:
         errors = ErrorCollector(description=f"in file '{self.file}'")
diff --git a/tests/data/validation/definitions/variable/variable.yaml b/tests/data/validation/definitions/variable/variable.yaml
index 59c7edf8..0d6094ea 100644
--- a/tests/data/validation/definitions/variable/variable.yaml
+++ b/tests/data/validation/definitions/variable/variable.yaml
@@ -2,6 +2,8 @@
     unit: EJ/yr
 - Primary Energy:
     unit: EJ/yr
+- Primary Energy|Coal:
+    unit: EJ/yr
 - Emissions|CO2:
     unit: Mt CO2/yr
 - Emissions|CH4:
diff --git a/tests/test_validate_data.py b/tests/test_validate_data.py
index 67dec617..7cf2973b 100644
--- a/tests/test_validate_data.py
+++ b/tests/test_validate_data.py
@@ -56,3 +56,11 @@ def test_DataValidator_validate_with_definition_raises(dimension, match):
         dimensions=[dim for dim in ["region", "variable"] if dim != dimension],
     )
     assert data_validator.validate_with_definition(dsd) is None
+
+
+def test_DataValidator_apply_no_matching_data(simple_df):
+    data_validator = DataValidator.from_file(
+        DATA_VALIDATION_TEST_DIR / "simple_validation.yaml"
+    )
+    # no data matches validation criteria, `apply()` passes and returns unchanged object
+    assert data_validator.apply(simple_df) == simple_df

From 1dcfa11a894ecd37e04de91154e19eddb3b5823f Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Fri, 16 Aug 2024 13:20:18 +0200
Subject: [PATCH 06/19] Add a test for showing how to fail validation

---
 .../validate_data/validate_data_failing.yaml          | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 tests/data/validation/validate_data/validate_data_failing.yaml

diff --git a/tests/data/validation/validate_data/validate_data_failing.yaml b/tests/data/validation/validate_data/validate_data_failing.yaml
new file mode 100644
index 00000000..e576e3a4
--- /dev/null
+++ b/tests/data/validation/validate_data/validate_data_failing.yaml
@@ -0,0 +1,11 @@
+ # 2005 value passes the validation, but the 2010 value does not
+ - variable: Primary Energy
+   upper_bound: 5.
+# variable exists only for 'scen_a'
+ - variable: Primary Energy|Coal
+   lower_bound: 2
+# both upper and lower bound are triggered
+ - variable: Primary Energy
+   year: 2005
+   upper_bound: 1.9
+   lower_bound: 1.1

From cb37ff37d84c0c7057deff338c14b5ff6333272f Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Fri, 16 Aug 2024 13:24:57 +0200
Subject: [PATCH 07/19] Add a test for showing how to fail validation

---
 ...alidate_data_failing.yaml => validate_data_fails.yaml} | 0
 tests/test_validate_data.py                               | 8 ++++++++
 2 files changed, 8 insertions(+)
 rename tests/data/validation/validate_data/{validate_data_failing.yaml => validate_data_fails.yaml} (100%)

diff --git a/tests/data/validation/validate_data/validate_data_failing.yaml b/tests/data/validation/validate_data/validate_data_fails.yaml
similarity index 100%
rename from tests/data/validation/validate_data/validate_data_failing.yaml
rename to tests/data/validation/validate_data/validate_data_fails.yaml
diff --git a/tests/test_validate_data.py b/tests/test_validate_data.py
index 7cf2973b..073e9ba5 100644
--- a/tests/test_validate_data.py
+++ b/tests/test_validate_data.py
@@ -64,3 +64,11 @@ def test_DataValidator_apply_no_matching_data(simple_df):
     )
     # no data matches validation criteria, `apply()` passes and returns unchanged object
     assert data_validator.apply(simple_df) == simple_df
+
+
+def test_DataValidator_apply_fails(simple_df):
+    data_validator = DataValidator.from_file(
+        DATA_VALIDATION_TEST_DIR / "validate_data_fails.yaml"
+    )
+    with pytest.raises(ValueError, match="Data validation failed"):
+        data_validator.apply(simple_df)
\ No newline at end of file

From 8d3d8223a5cdc2d43212318928144c0c67d59f0b Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Fri, 16 Aug 2024 13:27:06 +0200
Subject: [PATCH 08/19] Make black

---
 tests/test_validate_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_validate_data.py b/tests/test_validate_data.py
index 073e9ba5..59dfaf28 100644
--- a/tests/test_validate_data.py
+++ b/tests/test_validate_data.py
@@ -71,4 +71,4 @@ def test_DataValidator_apply_fails(simple_df):
         DATA_VALIDATION_TEST_DIR / "validate_data_fails.yaml"
     )
     with pytest.raises(ValueError, match="Data validation failed"):
-        data_validator.apply(simple_df)
\ No newline at end of file
+        data_validator.apply(simple_df)

From 68accb8749a997c31d315cf70ff7d73a74a1b2ae Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Mon, 19 Aug 2024 10:13:36 +0200
Subject: [PATCH 09/19] Write failing validation for each item to log with
 criteria

---
 nomenclature/processor/data_validator.py |   7 +-
 tests/data/validation/Untitled.ipynb     | 347 +++++++++++++++++++++++
 2 files changed, 351 insertions(+), 3 deletions(-)
 create mode 100644 tests/data/validation/Untitled.ipynb

diff --git a/nomenclature/processor/data_validator.py b/nomenclature/processor/data_validator.py
index 3920c31d..0b96635e 100644
--- a/nomenclature/processor/data_validator.py
+++ b/nomenclature/processor/data_validator.py
@@ -2,7 +2,6 @@
 from pathlib import Path
 from typing import List, Union
 
-import pandas as pd
 import yaml
 from pyam import IamDataFrame
 from pyam.logging import adjust_log_level
@@ -46,13 +45,15 @@ def apply(self, df: IamDataFrame) -> IamDataFrame:
                     for direction in ["upper_bound", "lower_bound"]:
                         if getattr(item, direction) is not None:
                             failed_validation[direction] = getattr(item, direction)
-                    failed_validation_list.append(failed_validation)
+                    failed_validation_list.append(
+                        f"Criteria: {item.criteria}\n{failed_validation}\n"
+                    )
 
             if failed_validation_list:
                 logger.error(
                     "Failed data validation (file %s):\n%s",
                     get_relative_path(self.file),
-                    pd.concat(failed_validation_list),
+                    "\n".join(failed_validation_list),
                 )
                 error = True
 
diff --git a/tests/data/validation/Untitled.ipynb b/tests/data/validation/Untitled.ipynb
new file mode 100644
index 00000000..fd381df3
--- /dev/null
+++ b/tests/data/validation/Untitled.ipynb
@@ -0,0 +1,347 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "e7b2b9b1-3e81-480f-823b-32e5d6e52f2a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import pyam\n",
+    "import nomenclature"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "3b021ffb-743f-48c1-8bc4-447f18f0e6da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "3e640186-93b2-40e2-8a91-b094f9e053d2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logger = logging.getLogger(__name__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "a672ea04-cf92-4018-bf75-88c01e3de790",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nomenclature.processor.utils import get_relative_path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "be399c88-0e2b-4079-85f5-6780e49d68bf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "TEST_DF = pd.DataFrame(\n",
+    "    [\n",
+    "        [\"model_a\", \"scen_a\", \"World\", \"Primary Energy\", \"EJ/yr\", 1, 6.0],\n",
+    "        [\"model_a\", \"scen_a\", \"World\", \"Primary Energy|Coal\", \"EJ/yr\", 0.5, 3],\n",
+    "        [\"model_a\", \"scen_b\", \"World\", \"Primary Energy\", \"EJ/yr\", 2, 7],\n",
+    "    ],\n",
+    "    columns=pyam.IAMC_IDX + [2005, 2010],\n",
+    ")\n",
+    "df = pyam.IamDataFrame(TEST_DF)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "35d319e2-5607-4ebd-941e-e53522f4d599",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dsd = nomenclature.DataStructureDefinition(\"definitions/\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "edc41204-b82a-4e58-a0d5-138a581c97fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "processor = nomenclature.processor.DataValidator.from_file(\n",
+    "    \"validate_data/validate_data_fails.yaml\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "0326423a-32c0-4b25-b974-a2597ca0ded5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "processor.validate_with_definition(dsd)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "4cff6cf3-58f2-423a-8dba-4b84b083da6d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.set_option(\"display.width\", 180)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "80814f71-fe8e-47bb-8389-a54e7ff9041f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-08-19 10:10:33 ERROR    Failed data validation (file validate_data/validate_data_fails.yaml):\n",
+      "Criteria: {'variable': ['Primary Energy'], 'upper_bound': 5.0}\n",
+      "     model scenario region        variable   unit  year  value  upper_bound\n",
+      "0  model_a   scen_a  World  Primary Energy  EJ/yr  2010    6.0          5.0\n",
+      "1  model_a   scen_b  World  Primary Energy  EJ/yr  2010    7.0          5.0\n",
+      "\n",
+      "Criteria: {'variable': ['Primary Energy|Coal'], 'lower_bound': 2.0}\n",
+      "     model scenario region             variable   unit  year  value  lower_bound\n",
+      "0  model_a   scen_a  World  Primary Energy|Coal  EJ/yr  2005    0.5          2.0\n",
+      "\n",
+      "Criteria: {'variable': ['Primary Energy'], 'year': [2005], 'upper_bound': 1.9, 'lower_bound': 1.1}\n",
+      "     model scenario region        variable   unit  year  value  upper_bound  lower_bound\n",
+      "0  model_a   scen_a  World  Primary Energy  EJ/yr  2005    1.0          1.9          1.1\n",
+      "1  model_a   scen_b  World  Primary Energy  EJ/yr  2005    2.0          1.9          1.1\n",
+      "\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "Data validation failed. Please check the log for details.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[10], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mprocessor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/GitHub/nomenclature/nomenclature/processor/data_validator.py:61\u001b[0m, in \u001b[0;36mDataValidator.apply\u001b[0;34m(self, df)\u001b[0m\n\u001b[1;32m     58\u001b[0m         error \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m     60\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m error:\n\u001b[0;32m---> 61\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m     62\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mData validation failed. Please check the log for details.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     63\u001b[0m         )\n\u001b[1;32m     64\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m df\n",
+      "\u001b[0;31mValueError\u001b[0m: Data validation failed. Please check the log for details."
+     ]
+    }
+   ],
+   "source": [
+    "processor.apply(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2d910ec0-44a6-47da-9e2d-d4d5390d40b5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "00cee2ef-c1c1-46e9-9e04-5e662e827904",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac27a3ed-9e85-4426-92ff-9b6eeccc79aa",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4482a0e-2743-4b73-9782-16648189c4e1",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b8047fa-58a9-4fef-a670-ad4f7f3134a5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x.dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba8368e4-c66b-47ca-bd2d-4dcb79d3dd24",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y = df.validate(year=2010, upper_bound=2.5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3cac9da3-6151-44c4-82b3-21209e983c6e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y[\"upper_bound\"] = 2.5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2cdb1852-86ec-40c9-ab5b-396155481290",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c96c95a7-7d76-445b-bcf2-1311918f4669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.validate(**x.dict())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d36ceae3-470b-43de-85e8-57a3e2348802",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x = processor.criteria_items[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e1fddc65-2f9d-4bf6-9b30-ad8d03251cae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dict(item for item in x.model_dump().items() if item[1] is not None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b9c67a4a-ad77-4c3f-a916-8b0b473086d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x.get_filters()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "54d69a12-ce0e-4976-ba92-42d41b939f29",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "failed_validation_data = df.validate(**item.dict())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a6fb4fc3-b4f9-48bb-b105-9d64244513fd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "?pyam.logging.adjust_log_level"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c1f37a28-4a48-4570-afaf-8b83e0979bd2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "item.dict()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "90987d27-347a-4f37-a4b9-48634a7f11b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "self = processor\n",
+    "\n",
+    "error = False\n",
+    "\n",
+    "for item in self.criteria_items:\n",
+    "    with pyam.logging.adjust_log_level():\n",
+    "        failed_validation_data = df.validate(**item.dict())\n",
+    "    if not failed_validation_data.empty:\n",
+    "        logger.error(\n",
+    "            \"Failed data validation.\\nFile: %s\\n\\n%s\",\n",
+    "            get_relative_path(self.file),\n",
+    "            failed_validation_data,\n",
+    "        )\n",
+    "        error = True\n",
+    "\n",
+    "if error:\n",
+    "    raise ValueError(\"Failed data validation. Please check the log for details.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4b7b493-8315-47f7-9965-8ca9731f911e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 710497bdfc46e7576a5417c2bfdcfc425e939ad7 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Mon, 19 Aug 2024 10:38:44 +0200
Subject: [PATCH 10/19] Don't add upper/lower bound columns explicitly

---
 nomenclature/processor/data_validator.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/nomenclature/processor/data_validator.py b/nomenclature/processor/data_validator.py
index 0b96635e..cfe0b733 100644
--- a/nomenclature/processor/data_validator.py
+++ b/nomenclature/processor/data_validator.py
@@ -42,9 +42,6 @@ def apply(self, df: IamDataFrame) -> IamDataFrame:
             for item in self.criteria_items:
                 failed_validation = df.validate(**item.criteria)
                 if failed_validation is not None:
-                    for direction in ["upper_bound", "lower_bound"]:
-                        if getattr(item, direction) is not None:
-                            failed_validation[direction] = getattr(item, direction)
                     failed_validation_list.append(
                         f"Criteria: {item.criteria}\n{failed_validation}\n"
                     )

From b3288b98a8cb263fed1b17d1dd500616b1ff73d5 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Mon, 19 Aug 2024 11:16:20 +0200
Subject: [PATCH 11/19] Make more concise log error messages

---
 nomenclature/processor/data_validator.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/nomenclature/processor/data_validator.py b/nomenclature/processor/data_validator.py
index cfe0b733..a1096962 100644
--- a/nomenclature/processor/data_validator.py
+++ b/nomenclature/processor/data_validator.py
@@ -1,5 +1,6 @@
 import logging
 from pathlib import Path
+import textwrap
 from typing import List, Union
 
 import yaml
@@ -35,26 +36,28 @@ def from_file(cls, file: Union[Path, str]) -> "DataValidator":
         return cls(file=file, criteria_items=content)
 
     def apply(self, df: IamDataFrame) -> IamDataFrame:
-        failed_validation_list = []
-        error = False
+        error_list = []
 
         with adjust_log_level():
             for item in self.criteria_items:
                 failed_validation = df.validate(**item.criteria)
                 if failed_validation is not None:
-                    failed_validation_list.append(
-                        f"Criteria: {item.criteria}\n{failed_validation}\n"
+                    error_list.append(
+                        "  Criteria: "
+                        + ", ".join(
+                            [f"{key}: {value}" for key, value in item.criteria.items()]
+                        )
+                    )
+                    error_list.append(
+                        textwrap.indent(str(failed_validation), prefix="    ") + "\n"
                     )
 
-            if failed_validation_list:
+            if error_list:
                 logger.error(
                     "Failed data validation (file %s):\n%s",
                     get_relative_path(self.file),
-                    "\n".join(failed_validation_list),
+                    "\n".join(error_list),
                 )
-                error = True
-
-            if error:
                 raise ValueError(
                     "Data validation failed. Please check the log for details."
                 )

From b190f96d20a1bbb4dd479e73a540ab7c88a8bb5a Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Mon, 19 Aug 2024 11:16:32 +0200
Subject: [PATCH 12/19] Add a test

---
 tests/test_validate_data.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/tests/test_validate_data.py b/tests/test_validate_data.py
index 59dfaf28..e07bb1b7 100644
--- a/tests/test_validate_data.py
+++ b/tests/test_validate_data.py
@@ -66,9 +66,31 @@ def test_DataValidator_apply_no_matching_data(simple_df):
     assert data_validator.apply(simple_df) == simple_df
 
 
-def test_DataValidator_apply_fails(simple_df):
+def test_DataValidator_apply_fails(simple_df, caplog):
     data_validator = DataValidator.from_file(
         DATA_VALIDATION_TEST_DIR / "validate_data_fails.yaml"
     )
+
+    failed_validation_message = [
+        "Failed data validation (file data/validation/validate_data/validate_data_fails.yaml):"
+        """
+  Criteria: variable: ['Primary Energy'], upper_bound: 5.0
+         model scenario region        variable   unit  year  value
+    0  model_a   scen_a  World  Primary Energy  EJ/yr  2010    6.0
+    1  model_a   scen_b  World  Primary Energy  EJ/yr  2010    7.0
+
+  Criteria: variable: ['Primary Energy|Coal'], lower_bound: 2.0
+         model scenario region             variable   unit  year  value
+    0  model_a   scen_a  World  Primary Energy|Coal  EJ/yr  2005    0.5
+
+  Criteria: variable: ['Primary Energy'], year: [2005], upper_bound: 1.9, lower_bound: 1.1
+         model scenario region        variable   unit  year  value
+    0  model_a   scen_a  World  Primary Energy  EJ/yr  2005    1.0
+    1  model_a   scen_b  World  Primary Energy  EJ/yr  2005    2.0""",
+    ]
+
     with pytest.raises(ValueError, match="Data validation failed"):
         data_validator.apply(simple_df)
+
+    # check if the log message contains the correct information
+    assert all(x in caplog.text for x in failed_validation_message)

From 856a412d3a5d9615689d8c266589893f6ac5a4a9 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Mon, 19 Aug 2024 11:34:49 +0200
Subject: [PATCH 13/19] Fix failing test

---
 tests/test_validate_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_validate_data.py b/tests/test_validate_data.py
index e07bb1b7..5bd332d0 100644
--- a/tests/test_validate_data.py
+++ b/tests/test_validate_data.py
@@ -72,7 +72,7 @@ def test_DataValidator_apply_fails(simple_df, caplog):
     )
 
     failed_validation_message = [
-        "Failed data validation (file data/validation/validate_data/validate_data_fails.yaml):"
+        "Failed data validation (file data/validation/validate_data/validate_data_fails.yaml):",
         """
   Criteria: variable: ['Primary Energy'], upper_bound: 5.0
          model scenario region        variable   unit  year  value

From 766787e642efe8d62245d7582c7460889cd61720 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Mon, 19 Aug 2024 11:46:35 +0200
Subject: [PATCH 14/19] Simplify test to one assertion

---
 tests/test_validate_data.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/test_validate_data.py b/tests/test_validate_data.py
index 5bd332d0..fc0beced 100644
--- a/tests/test_validate_data.py
+++ b/tests/test_validate_data.py
@@ -71,9 +71,8 @@ def test_DataValidator_apply_fails(simple_df, caplog):
         DATA_VALIDATION_TEST_DIR / "validate_data_fails.yaml"
     )
 
-    failed_validation_message = [
-        "Failed data validation (file data/validation/validate_data/validate_data_fails.yaml):",
-        """
+    failed_validation_message = \
+        """Failed data validation (file data/validation/validate_data/validate_data_fails.yaml):
   Criteria: variable: ['Primary Energy'], upper_bound: 5.0
          model scenario region        variable   unit  year  value
     0  model_a   scen_a  World  Primary Energy  EJ/yr  2010    6.0
@@ -86,11 +85,10 @@ def test_DataValidator_apply_fails(simple_df, caplog):
   Criteria: variable: ['Primary Energy'], year: [2005], upper_bound: 1.9, lower_bound: 1.1
          model scenario region        variable   unit  year  value
     0  model_a   scen_a  World  Primary Energy  EJ/yr  2005    1.0
-    1  model_a   scen_b  World  Primary Energy  EJ/yr  2005    2.0""",
-    ]
+    1  model_a   scen_b  World  Primary Energy  EJ/yr  2005    2.0"""
 
     with pytest.raises(ValueError, match="Data validation failed"):
         data_validator.apply(simple_df)
 
     # check if the log message contains the correct information
-    assert all(x in caplog.text for x in failed_validation_message)
+    assert failed_validation_message in caplog.text

From 37f689af9c0609e88fa73a6f23f5352d3ebec6cb Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Mon, 19 Aug 2024 11:57:04 +0200
Subject: [PATCH 15/19] Check if console-with is causing the problems

---
 tests/test_validate_data.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/test_validate_data.py b/tests/test_validate_data.py
index fc0beced..638c7211 100644
--- a/tests/test_validate_data.py
+++ b/tests/test_validate_data.py
@@ -1,3 +1,4 @@
+import pandas as pd
 import pytest
 from conftest import TEST_DATA_DIR
 
@@ -71,8 +72,10 @@ def test_DataValidator_apply_fails(simple_df, caplog):
         DATA_VALIDATION_TEST_DIR / "validate_data_fails.yaml"
     )
 
-    failed_validation_message = \
-        """Failed data validation (file data/validation/validate_data/validate_data_fails.yaml):
+    # TODO implement a utility function to display pandas nicely
+    pd.set_option("display.width", 180)
+
+    failed_validation_message = """Failed data validation (file data/validation/validate_data/validate_data_fails.yaml):
   Criteria: variable: ['Primary Energy'], upper_bound: 5.0
          model scenario region        variable   unit  year  value
     0  model_a   scen_a  World  Primary Energy  EJ/yr  2010    6.0

From a61b7c07a7e1e101caeacf6054a83719fb12afda Mon Sep 17 00:00:00 2001
From: Philip Hackstock <20710924+phackstock@users.noreply.github.com>
Date: Mon, 19 Aug 2024 13:03:22 +0200
Subject: [PATCH 16/19] Fix validate data path

---
 tests/test_validate_data.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/test_validate_data.py b/tests/test_validate_data.py
index 638c7211..51e47f5b 100644
--- a/tests/test_validate_data.py
+++ b/tests/test_validate_data.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 import pandas as pd
 import pytest
 from conftest import TEST_DATA_DIR
@@ -68,14 +70,13 @@ def test_DataValidator_apply_no_matching_data(simple_df):
 
 
 def test_DataValidator_apply_fails(simple_df, caplog):
-    data_validator = DataValidator.from_file(
-        DATA_VALIDATION_TEST_DIR / "validate_data_fails.yaml"
-    )
+    data_file = DATA_VALIDATION_TEST_DIR / "validate_data_fails.yaml"
+    data_validator = DataValidator.from_file(data_file)
 
     # TODO implement a utility function to display pandas nicely
     pd.set_option("display.width", 180)
 
-    failed_validation_message = """Failed data validation (file data/validation/validate_data/validate_data_fails.yaml):
+    failed_validation_message = f"""Failed data validation (file {data_file.relative_to(Path.cwd())}):
   Criteria: variable: ['Primary Energy'], upper_bound: 5.0
          model scenario region        variable   unit  year  value
     0  model_a   scen_a  World  Primary Energy  EJ/yr  2010    6.0

From 7868fd529abf70a775c5ded1a033b2f9842adb55 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Mon, 19 Aug 2024 13:28:54 +0200
Subject: [PATCH 17/19] Remove dev notebook

---
 tests/data/validation/Untitled.ipynb | 347 ---------------------------
 1 file changed, 347 deletions(-)
 delete mode 100644 tests/data/validation/Untitled.ipynb

diff --git a/tests/data/validation/Untitled.ipynb b/tests/data/validation/Untitled.ipynb
deleted file mode 100644
index fd381df3..00000000
--- a/tests/data/validation/Untitled.ipynb
+++ /dev/null
@@ -1,347 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "e7b2b9b1-3e81-480f-823b-32e5d6e52f2a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import pyam\n",
-    "import nomenclature"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "3b021ffb-743f-48c1-8bc4-447f18f0e6da",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import logging"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "3e640186-93b2-40e2-8a91-b094f9e053d2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "logger = logging.getLogger(__name__)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "a672ea04-cf92-4018-bf75-88c01e3de790",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from nomenclature.processor.utils import get_relative_path"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "be399c88-0e2b-4079-85f5-6780e49d68bf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "TEST_DF = pd.DataFrame(\n",
-    "    [\n",
-    "        [\"model_a\", \"scen_a\", \"World\", \"Primary Energy\", \"EJ/yr\", 1, 6.0],\n",
-    "        [\"model_a\", \"scen_a\", \"World\", \"Primary Energy|Coal\", \"EJ/yr\", 0.5, 3],\n",
-    "        [\"model_a\", \"scen_b\", \"World\", \"Primary Energy\", \"EJ/yr\", 2, 7],\n",
-    "    ],\n",
-    "    columns=pyam.IAMC_IDX + [2005, 2010],\n",
-    ")\n",
-    "df = pyam.IamDataFrame(TEST_DF)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "35d319e2-5607-4ebd-941e-e53522f4d599",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dsd = nomenclature.DataStructureDefinition(\"definitions/\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "edc41204-b82a-4e58-a0d5-138a581c97fb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "processor = nomenclature.processor.DataValidator.from_file(\n",
-    "    \"validate_data/validate_data_fails.yaml\"\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "0326423a-32c0-4b25-b974-a2597ca0ded5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "processor.validate_with_definition(dsd)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "4cff6cf3-58f2-423a-8dba-4b84b083da6d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pd.set_option(\"display.width\", 180)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "80814f71-fe8e-47bb-8389-a54e7ff9041f",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2024-08-19 10:10:33 ERROR    Failed data validation (file validate_data/validate_data_fails.yaml):\n",
-      "Criteria: {'variable': ['Primary Energy'], 'upper_bound': 5.0}\n",
-      "     model scenario region        variable   unit  year  value  upper_bound\n",
-      "0  model_a   scen_a  World  Primary Energy  EJ/yr  2010    6.0          5.0\n",
-      "1  model_a   scen_b  World  Primary Energy  EJ/yr  2010    7.0          5.0\n",
-      "\n",
-      "Criteria: {'variable': ['Primary Energy|Coal'], 'lower_bound': 2.0}\n",
-      "     model scenario region             variable   unit  year  value  lower_bound\n",
-      "0  model_a   scen_a  World  Primary Energy|Coal  EJ/yr  2005    0.5          2.0\n",
-      "\n",
-      "Criteria: {'variable': ['Primary Energy'], 'year': [2005], 'upper_bound': 1.9, 'lower_bound': 1.1}\n",
-      "     model scenario region        variable   unit  year  value  upper_bound  lower_bound\n",
-      "0  model_a   scen_a  World  Primary Energy  EJ/yr  2005    1.0          1.9          1.1\n",
-      "1  model_a   scen_b  World  Primary Energy  EJ/yr  2005    2.0          1.9          1.1\n",
-      "\n"
-     ]
-    },
-    {
-     "ename": "ValueError",
-     "evalue": "Data validation failed. Please check the log for details.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[10], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mprocessor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/GitHub/nomenclature/nomenclature/processor/data_validator.py:61\u001b[0m, in \u001b[0;36mDataValidator.apply\u001b[0;34m(self, df)\u001b[0m\n\u001b[1;32m     58\u001b[0m         error \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m     60\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m error:\n\u001b[0;32m---> 61\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m     62\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mData validation failed. Please check the log for details.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     63\u001b[0m         )\n\u001b[1;32m     64\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m df\n",
-      "\u001b[0;31mValueError\u001b[0m: Data validation failed. Please check the log for details."
-     ]
-    }
-   ],
-   "source": [
-    "processor.apply(df)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2d910ec0-44a6-47da-9e2d-d4d5390d40b5",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "00cee2ef-c1c1-46e9-9e04-5e662e827904",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ac27a3ed-9e85-4426-92ff-9b6eeccc79aa",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b4482a0e-2743-4b73-9782-16648189c4e1",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8b8047fa-58a9-4fef-a670-ad4f7f3134a5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x.dict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ba8368e4-c66b-47ca-bd2d-4dcb79d3dd24",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "y = df.validate(year=2010, upper_bound=2.5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3cac9da3-6151-44c4-82b3-21209e983c6e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "y[\"upper_bound\"] = 2.5"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2cdb1852-86ec-40c9-ab5b-396155481290",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "y"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c96c95a7-7d76-445b-bcf2-1311918f4669",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.validate(**x.dict())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d36ceae3-470b-43de-85e8-57a3e2348802",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x = processor.criteria_items[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e1fddc65-2f9d-4bf6-9b30-ad8d03251cae",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dict(item for item in x.model_dump().items() if item[1] is not None)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b9c67a4a-ad77-4c3f-a916-8b0b473086d5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x.get_filters()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "54d69a12-ce0e-4976-ba92-42d41b939f29",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "failed_validation_data = df.validate(**item.dict())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a6fb4fc3-b4f9-48bb-b105-9d64244513fd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "?pyam.logging.adjust_log_level"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c1f37a28-4a48-4570-afaf-8b83e0979bd2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "item.dict()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "90987d27-347a-4f37-a4b9-48634a7f11b2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "self = processor\n",
-    "\n",
-    "error = False\n",
-    "\n",
-    "for item in self.criteria_items:\n",
-    "    with pyam.logging.adjust_log_level():\n",
-    "        failed_validation_data = df.validate(**item.dict())\n",
-    "    if not failed_validation_data.empty:\n",
-    "        logger.error(\n",
-    "            \"Failed data validation.\\nFile: %s\\n\\n%s\",\n",
-    "            get_relative_path(self.file),\n",
-    "            failed_validation_data,\n",
-    "        )\n",
-    "        error = True\n",
-    "\n",
-    "if error:\n",
-    "    raise ValueError(\"Failed data validation. Please check the log for details.\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d4b7b493-8315-47f7-9965-8ca9731f911e",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From 0f19b56353bc8fcda010c4d99d7f49b038da3bad Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Mon, 19 Aug 2024 13:32:58 +0200
Subject: [PATCH 18/19] Implement review suggestion by @phackstock

---
 nomenclature/processor/data_validator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nomenclature/processor/data_validator.py b/nomenclature/processor/data_validator.py
index a1096962..8afab90d 100644
--- a/nomenclature/processor/data_validator.py
+++ b/nomenclature/processor/data_validator.py
@@ -65,9 +65,9 @@ def apply(self, df: IamDataFrame) -> IamDataFrame:
 
     def validate_with_definition(self, dsd: DataStructureDefinition) -> None:
         errors = ErrorCollector(description=f"in file '{self.file}'")
-        for item in self.criteria_items:
+        for criterion in self.criteria_items:
             try:
-                item.validate_with_definition(dsd)
+                criterion.validate_with_definition(dsd)
             except ValueError as value_error:
                 errors.append(value_error)
         if errors:

From 9ae85dc5ca48464ed30cbaa34184239209e05827 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Mon, 19 Aug 2024 13:59:33 +0200
Subject: [PATCH 19/19] Remove unnecessary todo

---
 tests/test_validate_data.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/test_validate_data.py b/tests/test_validate_data.py
index 51e47f5b..2e451a78 100644
--- a/tests/test_validate_data.py
+++ b/tests/test_validate_data.py
@@ -1,6 +1,5 @@
 from pathlib import Path
 
-import pandas as pd
 import pytest
 from conftest import TEST_DATA_DIR
 
@@ -73,9 +72,6 @@ def test_DataValidator_apply_fails(simple_df, caplog):
     data_file = DATA_VALIDATION_TEST_DIR / "validate_data_fails.yaml"
     data_validator = DataValidator.from_file(data_file)
 
-    # TODO implement a utility function to display pandas nicely
-    pd.set_option("display.width", 180)
-
     failed_validation_message = f"""Failed data validation (file {data_file.relative_to(Path.cwd())}):
   Criteria: variable: ['Primary Energy'], upper_bound: 5.0
          model scenario region        variable   unit  year  value