Skip to content

Commit

Permalink
Merge pull request #5 from itsparser/dev
Browse files Browse the repository at this point in the history
Added valid version and blacklisted the env variable
  • Loading branch information
itsparser authored Sep 24, 2024
2 parents cab0491 + 5f1f5db commit 5a5e4ca
Show file tree
Hide file tree
Showing 11 changed files with 332 additions and 245 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: lint
on: [ push, pull_request ]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: chartboost/ruff-action@v1
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ __pycache__/
*.py[cod]
*$py.class

**.env**

# C extensions
*.so

Expand Down
20 changes: 3 additions & 17 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "langval"
version = "0.1.0"
description = "langval is a language model evaluation tool for evaluating the toxicity, accuracy, hallucination, and bias of language models."
name = "langeval"
version = "0.0.1"
description = "LangEval is a language model evaluation tool for evaluating the toxicity, accuracy, hallucination, and bias of language models."
authors = [
{ name = "Vasanth Kumar", email = "itsparser@gmail.com" },
{ name = "Adheeban", email = "iamadhee@gmail.com" },
Expand Down Expand Up @@ -31,17 +31,3 @@ build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
sources = ["src"]



[tool.ruff.lint]
select = ["F", "I"]
ignore = ["F401"]

[tool.ruff]
line-length = 100

[tool.ruff.format]
quote-style = "single"
indent-style = "tab"
docstring-code-format = true
docstring-code-line-length = 20
78 changes: 78 additions & 0 deletions ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".git-rewrite",
".hg",
".ipynb_checkpoints",
".mypy_cache",
".nox",
".pants.d",
".pyenv",
".pytest_cache",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
".vscode",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"site-packages",
"venv",
]

# Same as Black.
line-length = 88
indent-width = 4

# Assume Python 3.8
target-version = "py38"

[lint]
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
# McCabe complexity (`C901`) by default.
select = ["E4", "E7", "E9", "F"]
ignore = []

# Allow fix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
unfixable = []

# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

[format]
# Like Black, use double quotes for strings.
quote-style = "double"

# Like Black, indent with spaces, rather than tabs.
indent-style = "space"

# Like Black, respect magic trailing commas.
skip-magic-trailing-comma = false

# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"

# Enable auto-formatting of code examples in docstrings. Markdown,
# reStructuredText code/literal blocks and doctests are all supported.
#
# This is currently disabled by default, but it is planned for this
# to be opt-out in the future.
docstring-code-format = false

# Set the line length limit used when formatting code snippets in
# docstrings.
#
# This only has an effect when the `docstring-code-format` setting is
# enabled.
docstring-code-line-length = "dynamic"

16 changes: 8 additions & 8 deletions src/langval/error/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
class BaseLangvalError(Exception):
"""
Base class for all langval errors.
"""
"""
Base class for all langval errors.
"""

pass
pass


class EvalThreshold(BaseLangvalError):
def __init__(self, breached_value: dict):
self.breached_value = breached_value
def __init__(self, breached_value: dict):
self.breached_value = breached_value

def __str__(self):
return f'Validation failed. {self.breached_value}'
def __str__(self):
return f"Validation failed. {self.breached_value}"
126 changes: 66 additions & 60 deletions src/langval/eval/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from abc import ABC, abstractmethod
from typing import Any, override
from typing import Any

from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel

from langval.model import EvalMetric, ModuleModel, Validation
Expand All @@ -10,72 +9,79 @@


class BaseEval(ABC):
"""
Base class for all evaluations in langval
"""
"""
Base class for all evaluations in langval
"""

registry = {}
TOOLS = [arithemetic, comparison]
registry = {}
TOOLS = [arithemetic, comparison]

def __init__(self, *args, **kwargs):
self.validation = kwargs.get('validation') or Validation()
def __init__(self, *args, **kwargs):
self.validation = kwargs.get("validation") or Validation()

@abstractmethod
def eval(
self, answer: Any, question: Any = None, expected_answer: Any = None
) -> dict | BaseModel:
"""
Evaluates the model, Need to been override in all subclasses
Args:
answer (Any): The answer to evaluate.
question (Any, optional): The question to evaluate. Defaults to None.
expected_answer (Any, optional): The expected answer. Defaults to None.
@abstractmethod
def eval(
self, answer: Any, question: Any = None, expected_answer: Any = None
) -> dict | BaseModel:
"""
Evaluates the model, Need to been override in all subclasses
Args:
answer (Any): The answer to evaluate.
question (Any, optional): The question to evaluate. Defaults to None.
expected_answer (Any, optional): The expected answer. Defaults to None.
Returns:
dict | BaseModel: The evaluation result.
"""
pass
Returns:
dict | BaseModel: The evaluation result.
"""
pass

def compare(self, metric: EvalMetric, validation: Validation = None):
"""
Compares the metric with the validation
Args:
metric (EvalMetric): The metric to compare
validation (Validation, optional): The validation to compare with. Defaults to None.
def compare(self, metric: EvalMetric, validation: Validation = None):
"""
Compares the metric with the validation
Args:
metric (EvalMetric): The metric to compare
validation (Validation, optional): The validation to compare with. Defaults to None.
Returns:
bool: True if the metric is equal to the validation, False otherwise.
"""
if not validation:
validation = self.validation
result, exact_match = validation.compare(metric)
return result, exact_match
Returns:
bool: True if the metric is equal to the validation, False otherwise.
"""
if not validation:
validation = self.validation
result, exact_match = validation.compare(metric)
return result, exact_match

@classmethod
def validate(cls, toxicity: float, accuracy: float, hallucination: float, bias: float):
def decorator(_mod):
_type = check_type(_mod)
mod = cls.registry.setdefault(_type, {})
mod[_mod.__name__] = ModuleModel(
name=_mod.__name__,
type=_type,
metrics=Validation(
toxicity=toxicity, accuracy=accuracy, hallucination=hallucination, bias=bias
),
)
@classmethod
def validate(
cls, toxicity: float, accuracy: float, hallucination: float, bias: float
):
def decorator(_mod):
_type = check_type(_mod)
mod = cls.registry.setdefault(_type, {})
mod[_mod.__name__] = ModuleModel(
name=_mod.__name__,
type=_type,
metrics=Validation(
toxicity=toxicity,
accuracy=accuracy,
hallucination=hallucination,
bias=bias,
),
)

return decorator
return decorator

def question(cls, q: str = None):
def decorator(func):
def wrapper_func(*args, **kwargs):
expected_answer = func(*args, **kwargs)
model = args[0].model
result = model.invoke(q)
result = cls.eval(question=q, expected_answer=expected_answer, answer=result)
print(f'after request {q}\n{result}')
return result
def question(cls, q: str = None):
def decorator(func):
def wrapper_func(*args, **kwargs):
expected_answer = func(*args, **kwargs)
model = args[0].model
result = model.invoke(q)
result = cls.eval(
question=q, expected_answer=expected_answer, answer=result
)
print(f"after request {q}\n{result}")
return result

return wrapper_func
return wrapper_func

return decorator
return decorator
Loading

0 comments on commit 5a5e4ca

Please sign in to comment.