diff --git a/integrations/ragas/example/evaluation_from_pipeline_example.py b/integrations/ragas/example/evaluation_from_pipeline_example.py index 7aad28359..5cb76b57f 100644 --- a/integrations/ragas/example/evaluation_from_pipeline_example.py +++ b/integrations/ragas/example/evaluation_from_pipeline_example.py @@ -1,10 +1,12 @@ # A valid OpenAI API key must be provided as an environment variable "OPENAI_API_KEY" to run this example. import os -from dotenv import load_dotenv +from getpass import getpass -load_dotenv() -from haystack import Document +if "OPENAI_API_KEY" not in os.environ: + os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:") + +from haystack import Document, Pipeline from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.components.embedders import OpenAITextEmbedder, OpenAIDocumentEmbedder from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever @@ -12,8 +14,6 @@ from haystack.dataclasses import ChatMessage from haystack.components.generators.chat import OpenAIChatGenerator from haystack.components.builders import AnswerBuilder -from haystack import Pipeline -from haystack_integrations.components.evaluators.ragas import RagasEvaluator from langchain_openai import ChatOpenAI from ragas.llms import LangchainLLMWrapper diff --git a/integrations/ragas/example/evaluation_with_components_example.py b/integrations/ragas/example/evaluation_with_components_example.py index 4ee4c0a1d..c01f8a9ac 100644 --- a/integrations/ragas/example/evaluation_with_components_example.py +++ b/integrations/ragas/example/evaluation_with_components_example.py @@ -1,10 +1,13 @@ # A valid OpenAI API key must be provided as an environment variable "OPENAI_API_KEY" to run this example. import os -from dotenv import load_dotenv +from getpass import getpass -load_dotenv() -from haystack import Document +if "OPENAI_API_KEY" not in os.environ: + os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:") + + +from haystack import Document, Pipeline from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.components.embedders import OpenAITextEmbedder, OpenAIDocumentEmbedder from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever @@ -12,7 +15,6 @@ from haystack.dataclasses import ChatMessage from haystack.components.generators.chat import OpenAIChatGenerator from haystack.components.builders import AnswerBuilder -from haystack import Pipeline from haystack_integrations.components.evaluators.ragas import RagasEvaluator from langchain_openai import ChatOpenAI diff --git a/integrations/ragas/pydoc/config.yml b/integrations/ragas/pydoc/config.yml index 9a94e2293..033bc738c 100644 --- a/integrations/ragas/pydoc/config.yml +++ b/integrations/ragas/pydoc/config.yml @@ -12,7 +12,6 @@ processors: documented_only: true do_not_filter_modules: false skip_empty_modules: true - - type: filter - type: smart - type: crossref renderer: diff --git a/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py b/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py index 934bb8fa7..1091cb902 100644 --- a/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py +++ b/integrations/ragas/src/haystack_integrations/components/evaluators/ragas/evaluator.py @@ -1,7 +1,7 @@ import re from typing import Any, Dict, List, Optional, Union, get_args, get_origin -from haystack import Document, component, default_from_dict, default_to_dict +from haystack import Document, component from haystack.dataclasses import ChatMessage from langchain_core.embeddings import Embeddings as LangchainEmbeddings # type: ignore from langchain_core.language_models import BaseLanguageModel as LangchainLLM # type: ignore @@ -273,27 +273,3 @@ def _get_example_input(self, field: str) -> str: "rubrics": "{'score1': 'high_similarity'}", } return examples.get(field, "An appropriate value based on the field's type") - - def to_dict(self) -> Dict[str, Any]: - """ - Serializes the component to a dictionary. - - :returns: Dictionary with serialized data. - :raises DeserializationError: If the component cannot be serialized. - """ - return default_to_dict( - self, - ragas_metrics=self.metrics, - evaluator_llm=self.llm, - evaluator_embedding=self.embedding, - ) - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "RagasEvaluator": - """ - Deserializes the component from a dictionary. - - :param data: Dictionary to deserialize from. - :returns: Deserialized component. - """ - return default_from_dict(cls, data) diff --git a/integrations/ragas/tests/test_evaluator.py b/integrations/ragas/tests/test_evaluator.py index 2b1ada27d..f546d9e1e 100644 --- a/integrations/ragas/tests/test_evaluator.py +++ b/integrations/ragas/tests/test_evaluator.py @@ -6,31 +6,36 @@ from ragas.embeddings import BaseRagasEmbeddings from ragas.dataset_schema import EvaluationResult from haystack import Document -from haystack.dataclasses import ChatMessage from haystack_integrations.components.evaluators.ragas import RagasEvaluator -# Fixture to mock the 'run' method of RagasEvaluator +# Fixtures @pytest.fixture def mock_run(): + """Fixture to mock the 'run' method of RagasEvaluator.""" with mock.patch.object(RagasEvaluator, 'run') as mock_method: yield mock_method -def test_successful_initialization(): - """Test RagasEvaluator initializes correctly with valid inputs.""" +@pytest.fixture +def ragas_evaluator(): + """Fixture to create a valid RagasEvaluator instance.""" valid_metrics = [MagicMock(spec=Metric) for _ in range(3)] valid_llm = MagicMock(spec=BaseRagasLLM) valid_embedding = MagicMock(spec=BaseRagasEmbeddings) - - evaluator = RagasEvaluator( + return RagasEvaluator( ragas_metrics=valid_metrics, evaluator_llm=valid_llm, evaluator_embedding=valid_embedding, ) - assert evaluator.metrics == valid_metrics - assert evaluator.llm == valid_llm - assert evaluator.embedding == valid_embedding + + +# Tests +def test_successful_initialization(ragas_evaluator): + """Test RagasEvaluator initializes correctly with valid inputs.""" + assert len(ragas_evaluator.metrics) == 3 + assert isinstance(ragas_evaluator.llm, BaseRagasLLM) + assert isinstance(ragas_evaluator.embedding, BaseRagasEmbeddings) def test_invalid_metrics(): @@ -61,7 +66,7 @@ def test_invalid_embedding(): RagasEvaluator(ragas_metrics=[valid_metric], evaluator_embedding=invalid_embedding) -def test_optional_llm_and_embeddings(): +def test_initializer_allows_optional_llm_and_embeddings(): """Test RagasEvaluator initializes correctly with None for optional parameters.""" valid_metric = MagicMock(spec=Metric) @@ -75,55 +80,46 @@ def test_optional_llm_and_embeddings(): assert evaluator.embedding is None -def test_missing_columns_in_dataset(): - """Test if RagasEvaluator raises a ValueError when required columns are missing for a specific metric.""" +@pytest.mark.parametrize( + "invalid_input,field_name,error_message", + [ + (["Invalid query type"], "query", "'query' field expected"), + ([123, ["Invalid document"]], "documents", "Unsupported type in documents list"), + (["score_1"], "rubrics", "'rubrics' field expected"), + ], +) +def test_run_invalid_inputs(invalid_input, field_name, error_message): + """Test RagasEvaluator raises ValueError for invalid input types.""" evaluator = RagasEvaluator(ragas_metrics=[Faithfulness()]) query = "Which is the most popular global sport?" - reference = "Football is the most popular sport with around 4 billion followers worldwide" + documents = ["Football is the most popular sport."] response = "Football is the most popular sport in the world" with pytest.raises(ValueError) as exc_info: - evaluator.run(query=query, reference=reference, response=response) - - expected_error_message = "The metric [faithfulness] that is used requires the following additional columns ['documents'] to be present in the dataset." - assert expected_error_message == str(exc_info.value) - + if field_name == "query": + evaluator.run(query=invalid_input, documents=documents, response=response) + elif field_name == "documents": + evaluator.run(query=query, documents=invalid_input, response=response) + elif field_name == "rubrics": + evaluator.run(query=query, rubrics=invalid_input, documents=documents, response=response) -def test_run_invalid_query_type(): - """Test RagasEvaluator raises ValueError for invalid query type.""" - evaluator = RagasEvaluator(ragas_metrics=[Faithfulness()]) - query = ["Invalid query type"] # Should be str - documents = ["Football is the most popular sport."] - reference = ChatMessage(_content="Football is the most popular sport.", _role="human") - response = "Football is the most popular sport in the world" - - with pytest.raises(ValueError, match="The 'query' field expected .* but got 'list'"): - evaluator.run(query=query, documents=documents, reference=reference, response=response) + assert error_message in str(exc_info.value) -def test_run_invalid_rubrics_type(): - """Test RagasEvaluator raises ValueError for invalid rubrics type.""" +def test_missing_columns_in_dataset(): + """Test if RagasEvaluator raises a ValueError when required columns are missing for a specific metric.""" evaluator = RagasEvaluator(ragas_metrics=[Faithfulness()]) query = "Which is the most popular global sport?" + reference = "Football is the most popular sport with around 4 billion followers worldwide" response = "Football is the most popular sport in the world" - documents = ["Football is the most popular sport."] - rubrics = ["score_1"] # Should be dict - - with pytest.raises(ValueError, match="The 'rubrics' field expected 'one of Dict, NoneType', but got 'list'."): - evaluator.run(query=query, rubrics=rubrics, response=response, documents=documents) + with pytest.raises(ValueError) as exc_info: + evaluator.run(query=query, reference=reference, response=response) -def test_run_invalid_documents_type(): - """Test RagasEvaluator raises ValueError for invalid document types.""" - evaluator = RagasEvaluator(ragas_metrics=[Faithfulness()]) - query = "Which is the most popular global sport?" - documents = [123, ["Invalid document"]] # Invalid types - - with pytest.raises(ValueError, match="Unsupported type in documents list."): - evaluator.run(query=query, documents=documents) + assert "faithfulness" in str(exc_info.value) + assert "documents" in str(exc_info.value) -@patch.object(RagasEvaluator, 'run') def test_run_valid_input(mock_run): """Test RagasEvaluator runs successfully with valid input.""" mock_run.return_value = {"result": {"score": MagicMock(), "details": MagicMock(spec=EvaluationResult)}} @@ -153,3 +149,4 @@ def test_run_valid_input(mock_run): assert "result" in output assert isinstance(output["result"], dict) assert "score" in output["result"] + assert isinstance(output["result"]["details"], EvaluationResult)