Skip to content

Commit

Permalink
Address review feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
sahusiddharth committed Jan 28, 2025
1 parent bf0ec16 commit 17055ce
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 79 deletions.
10 changes: 5 additions & 5 deletions integrations/ragas/example/evaluation_from_pipeline_example.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
# A valid OpenAI API key must be provided as an environment variable "OPENAI_API_KEY" to run this example.

import os
from dotenv import load_dotenv
from getpass import getpass

load_dotenv()
from haystack import Document
if "OPENAI_API_KEY" not in os.environ:
os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")

from haystack import Document, Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.embedders import OpenAITextEmbedder, OpenAIDocumentEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.builders import ChatPromptBuilder
from haystack.dataclasses import ChatMessage
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.components.builders import AnswerBuilder
from haystack import Pipeline
from haystack_integrations.components.evaluators.ragas import RagasEvaluator

from langchain_openai import ChatOpenAI
from ragas.llms import LangchainLLMWrapper
Expand Down
10 changes: 6 additions & 4 deletions integrations/ragas/example/evaluation_with_components_example.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
# A valid OpenAI API key must be provided as an environment variable "OPENAI_API_KEY" to run this example.

import os
from dotenv import load_dotenv
from getpass import getpass

load_dotenv()
from haystack import Document
if "OPENAI_API_KEY" not in os.environ:
os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")


from haystack import Document, Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.embedders import OpenAITextEmbedder, OpenAIDocumentEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.builders import ChatPromptBuilder
from haystack.dataclasses import ChatMessage
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.components.builders import AnswerBuilder
from haystack import Pipeline
from haystack_integrations.components.evaluators.ragas import RagasEvaluator

from langchain_openai import ChatOpenAI
Expand Down
1 change: 0 additions & 1 deletion integrations/ragas/pydoc/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ processors:
documented_only: true
do_not_filter_modules: false
skip_empty_modules: true
- type: filter
- type: smart
- type: crossref
renderer:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
from typing import Any, Dict, List, Optional, Union, get_args, get_origin

from haystack import Document, component, default_from_dict, default_to_dict
from haystack import Document, component
from haystack.dataclasses import ChatMessage
from langchain_core.embeddings import Embeddings as LangchainEmbeddings # type: ignore
from langchain_core.language_models import BaseLanguageModel as LangchainLLM # type: ignore
Expand Down Expand Up @@ -273,27 +273,3 @@ def _get_example_input(self, field: str) -> str:
"rubrics": "{'score1': 'high_similarity'}",
}
return examples.get(field, "An appropriate value based on the field's type")

def to_dict(self) -> Dict[str, Any]:
"""
Serializes the component to a dictionary.
:returns: Dictionary with serialized data.
:raises DeserializationError: If the component cannot be serialized.
"""
return default_to_dict(
self,
ragas_metrics=self.metrics,
evaluator_llm=self.llm,
evaluator_embedding=self.embedding,
)

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "RagasEvaluator":
"""
Deserializes the component from a dictionary.
:param data: Dictionary to deserialize from.
:returns: Deserialized component.
"""
return default_from_dict(cls, data)
85 changes: 41 additions & 44 deletions integrations/ragas/tests/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,36 @@
from ragas.embeddings import BaseRagasEmbeddings
from ragas.dataset_schema import EvaluationResult
from haystack import Document
from haystack.dataclasses import ChatMessage
from haystack_integrations.components.evaluators.ragas import RagasEvaluator


# Fixture to mock the 'run' method of RagasEvaluator
# Fixtures
@pytest.fixture
def mock_run():
"""Fixture to mock the 'run' method of RagasEvaluator."""
with mock.patch.object(RagasEvaluator, 'run') as mock_method:
yield mock_method


def test_successful_initialization():
"""Test RagasEvaluator initializes correctly with valid inputs."""
@pytest.fixture
def ragas_evaluator():
"""Fixture to create a valid RagasEvaluator instance."""
valid_metrics = [MagicMock(spec=Metric) for _ in range(3)]
valid_llm = MagicMock(spec=BaseRagasLLM)
valid_embedding = MagicMock(spec=BaseRagasEmbeddings)

evaluator = RagasEvaluator(
return RagasEvaluator(
ragas_metrics=valid_metrics,
evaluator_llm=valid_llm,
evaluator_embedding=valid_embedding,
)
assert evaluator.metrics == valid_metrics
assert evaluator.llm == valid_llm
assert evaluator.embedding == valid_embedding


# Tests
def test_successful_initialization(ragas_evaluator):
"""Test RagasEvaluator initializes correctly with valid inputs."""
assert len(ragas_evaluator.metrics) == 3
assert isinstance(ragas_evaluator.llm, BaseRagasLLM)
assert isinstance(ragas_evaluator.embedding, BaseRagasEmbeddings)


def test_invalid_metrics():
Expand Down Expand Up @@ -61,7 +66,7 @@ def test_invalid_embedding():
RagasEvaluator(ragas_metrics=[valid_metric], evaluator_embedding=invalid_embedding)


def test_optional_llm_and_embeddings():
def test_initializer_allows_optional_llm_and_embeddings():
"""Test RagasEvaluator initializes correctly with None for optional parameters."""
valid_metric = MagicMock(spec=Metric)

Expand All @@ -75,55 +80,46 @@ def test_optional_llm_and_embeddings():
assert evaluator.embedding is None


def test_missing_columns_in_dataset():
"""Test if RagasEvaluator raises a ValueError when required columns are missing for a specific metric."""
@pytest.mark.parametrize(
"invalid_input,field_name,error_message",
[
(["Invalid query type"], "query", "'query' field expected"),
([123, ["Invalid document"]], "documents", "Unsupported type in documents list"),
(["score_1"], "rubrics", "'rubrics' field expected"),
],
)
def test_run_invalid_inputs(invalid_input, field_name, error_message):
"""Test RagasEvaluator raises ValueError for invalid input types."""
evaluator = RagasEvaluator(ragas_metrics=[Faithfulness()])
query = "Which is the most popular global sport?"
reference = "Football is the most popular sport with around 4 billion followers worldwide"
documents = ["Football is the most popular sport."]
response = "Football is the most popular sport in the world"

with pytest.raises(ValueError) as exc_info:
evaluator.run(query=query, reference=reference, response=response)

expected_error_message = "The metric [faithfulness] that is used requires the following additional columns ['documents'] to be present in the dataset."
assert expected_error_message == str(exc_info.value)

if field_name == "query":
evaluator.run(query=invalid_input, documents=documents, response=response)
elif field_name == "documents":
evaluator.run(query=query, documents=invalid_input, response=response)
elif field_name == "rubrics":
evaluator.run(query=query, rubrics=invalid_input, documents=documents, response=response)

def test_run_invalid_query_type():
"""Test RagasEvaluator raises ValueError for invalid query type."""
evaluator = RagasEvaluator(ragas_metrics=[Faithfulness()])
query = ["Invalid query type"] # Should be str
documents = ["Football is the most popular sport."]
reference = ChatMessage(_content="Football is the most popular sport.", _role="human")
response = "Football is the most popular sport in the world"

with pytest.raises(ValueError, match="The 'query' field expected .* but got 'list'"):
evaluator.run(query=query, documents=documents, reference=reference, response=response)
assert error_message in str(exc_info.value)


def test_run_invalid_rubrics_type():
"""Test RagasEvaluator raises ValueError for invalid rubrics type."""
def test_missing_columns_in_dataset():
"""Test if RagasEvaluator raises a ValueError when required columns are missing for a specific metric."""
evaluator = RagasEvaluator(ragas_metrics=[Faithfulness()])
query = "Which is the most popular global sport?"
reference = "Football is the most popular sport with around 4 billion followers worldwide"
response = "Football is the most popular sport in the world"
documents = ["Football is the most popular sport."]
rubrics = ["score_1"] # Should be dict

with pytest.raises(ValueError, match="The 'rubrics' field expected 'one of Dict, NoneType', but got 'list'."):
evaluator.run(query=query, rubrics=rubrics, response=response, documents=documents)

with pytest.raises(ValueError) as exc_info:
evaluator.run(query=query, reference=reference, response=response)

def test_run_invalid_documents_type():
"""Test RagasEvaluator raises ValueError for invalid document types."""
evaluator = RagasEvaluator(ragas_metrics=[Faithfulness()])
query = "Which is the most popular global sport?"
documents = [123, ["Invalid document"]] # Invalid types

with pytest.raises(ValueError, match="Unsupported type in documents list."):
evaluator.run(query=query, documents=documents)
assert "faithfulness" in str(exc_info.value)
assert "documents" in str(exc_info.value)


@patch.object(RagasEvaluator, 'run')
def test_run_valid_input(mock_run):
"""Test RagasEvaluator runs successfully with valid input."""
mock_run.return_value = {"result": {"score": MagicMock(), "details": MagicMock(spec=EvaluationResult)}}
Expand Down Expand Up @@ -153,3 +149,4 @@ def test_run_valid_input(mock_run):
assert "result" in output
assert isinstance(output["result"], dict)
assert "score" in output["result"]
assert isinstance(output["result"]["details"], EvaluationResult)

0 comments on commit 17055ce

Please sign in to comment.