diff --git a/README.md b/README.md index 024f914..be1f563 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ -# langval +# LangEval -langval is a language model evaluation tool for evaluating the toxicity, accuracy, hallucination, and bias of language +LangEval is a language model evaluation tool for evaluating the toxicity, accuracy, hallucination, and bias of language models. ## Installation ```bash -pip install langval +pip install langeval ``` ## Usage @@ -16,11 +16,11 @@ from unittest import TestCase from langchain_openai import ChatOpenAI -from langval.eval.langchain import LangchainEval -from langval.model import Validation +from langeval.eval.langchain import LangchainEval +from langeval.model import Validation llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.3) -langeval = LangchainEval( +leval = LangchainEval( llm, validation=Validation(toxicity=0.2, accuracy=0.9, hallucination=0.2, bias=0.1) ) @@ -28,7 +28,7 @@ langeval = LangchainEval( class TestEval(TestCase): model = llm - @langeval.question('What is the capital of France?') + @leval.question('What is the capital of France?') def test_eval(self): return 'paris' @@ -40,4 +40,4 @@ Contributions are welcome! Please read the [contributing guidelines](CONTRIBUTIN ## License -langval is licensed under the [MIT License](LICENSE). +langeval is licensed under the [MIT License](LICENSE). diff --git a/pyproject.toml b/pyproject.toml index ec33ff2..2c4eb87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "langeval" -version = "0.0.1" +version = "0.0.2" description = "LangEval is a language model evaluation tool for evaluating the toxicity, accuracy, hallucination, and bias of language models." authors = [ { name = "Vasanth Kumar", email = "itsparser@gmail.com" }, diff --git a/src/langeval/__init__.py b/src/langeval/__init__.py new file mode 100644 index 0000000..3c6e2d7 --- /dev/null +++ b/src/langeval/__init__.py @@ -0,0 +1,5 @@ +import error +import eval +import model + +__all__ = ["eval", "error", "model"] diff --git a/src/langval/prompt.py b/src/langeval/_prompt.py similarity index 100% rename from src/langval/prompt.py rename to src/langeval/_prompt.py diff --git a/src/langval/utils.py b/src/langeval/_utils.py similarity index 100% rename from src/langval/utils.py rename to src/langeval/_utils.py diff --git a/src/langval/error/__init__.py b/src/langeval/error/__init__.py similarity index 100% rename from src/langval/error/__init__.py rename to src/langeval/error/__init__.py diff --git a/src/langeval/eval/__init__.py b/src/langeval/eval/__init__.py new file mode 100644 index 0000000..0137818 --- /dev/null +++ b/src/langeval/eval/__init__.py @@ -0,0 +1,4 @@ +from .langchain import LangchainEval +from .base import BaseEval + +__all__ = ["LangchainEval", "BaseEval"] diff --git a/src/langval/eval/base.py b/src/langeval/eval/base.py similarity index 91% rename from src/langval/eval/base.py rename to src/langeval/eval/base.py index 1e824d4..f9b2dc6 100644 --- a/src/langval/eval/base.py +++ b/src/langeval/eval/base.py @@ -3,9 +3,9 @@ from pydantic import BaseModel -from langval.model import EvalMetric, ModuleModel, Validation -from langval.tools import arithemetic, comparison -from langval.utils import check_type +from ..model import EvalMetric, ModuleModel, Validation +from ..tools import arithemetic, comparison +from .._utils import check_type class BaseEval(ABC): @@ -70,13 +70,13 @@ def decorator(_mod): return decorator - def question(cls, q: str = None): + def question(self, q: str = None): def decorator(func): def wrapper_func(*args, **kwargs): expected_answer = func(*args, **kwargs) model = args[0].model result = model.invoke(q) - result = cls.eval( + result = self.eval( question=q, expected_answer=expected_answer, answer=result ) print(f"after request {q}\n{result}") diff --git a/src/langval/eval/langchain.py b/src/langeval/eval/langchain.py similarity index 92% rename from src/langval/eval/langchain.py rename to src/langeval/eval/langchain.py index a45f302..ee892b3 100644 --- a/src/langval/eval/langchain.py +++ b/src/langeval/eval/langchain.py @@ -4,10 +4,10 @@ from langchain_core.language_models import BaseChatModel from langchain_core.prompts import PromptTemplate -from langval.error import EvalThreshold -from langval.eval.base import BaseEval -from langval.model import EvalMetric, Validation -from langval.prompt import LANGCHAIN_SYSTEM_PROMPT +from ..error import EvalThreshold +from ..eval.base import BaseEval +from ..model import EvalMetric, Validation +from .._prompt import LANGCHAIN_SYSTEM_PROMPT class LangchainEval(BaseEval): diff --git a/src/langval/model/__init__.py b/src/langeval/model/__init__.py similarity index 100% rename from src/langval/model/__init__.py rename to src/langeval/model/__init__.py diff --git a/src/langval/tools/__init__.py b/src/langeval/tools/__init__.py similarity index 100% rename from src/langval/tools/__init__.py rename to src/langeval/tools/__init__.py diff --git a/src/langval/__init__.py b/src/langval/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/langval/agent.py b/src/langval/agent.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/langval/eval/__init__.py b/src/langval/eval/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_eval.py b/tests/test_eval.py index ba48cb9..aab5327 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -2,8 +2,8 @@ from langchain_openai import ChatOpenAI -from langval.eval.langchain import LangchainEval -from langval.model import Validation +from langeval.eval.langchain import LangchainEval +from langeval.model import Validation llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.3) _eval = LangchainEval(