forked from confident-ai/deepeval
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathr.py
61 lines (45 loc) · 1.6 KB
/
r.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from deepeval.metrics.dag import (
VerdictNode,
TaskNode,
NonBinaryJudgementNode,
BinaryJudgementNode,
)
from deepeval.metrics.dag.graph import DeepAcyclicGraph
from deepeval.test_case import LLMTestCaseParams, LLMTestCase
from deepeval.metrics import DAGMetric
non_binary = NonBinaryJudgementNode(
criteria="What is the output language?",
children=[
VerdictNode(verdict="english", score=10),
VerdictNode(verdict="French", score=0),
],
)
verdict_node_yes = VerdictNode(verdict=True, child=non_binary)
verdict_node_no = VerdictNode(verdict=False, score=0)
binary = BinaryJudgementNode(
criteria="does the list of extracted words contain the same number of words in the `actual_output`, ignore formatting?",
evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT],
children=[verdict_node_yes, verdict_node_no],
)
task_node = TaskNode(
instructions="Extract all words from the `actual output`",
evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT],
output_label="list of extracted words",
children=[binary, non_binary],
)
dag_metric = DAGMetric(name="dag", root_node=task_node, async_mode=False)
# dag_metric.measure(
# test_case=LLMTestCase(input="..", actual_output="Les miserable")
# )
# print(dag_metric.score)
# print(dag_metric.reason)
async def main():
# Perform the measure asynchronously
await dag_metric.a_measure(
test_case=LLMTestCase(input="..", actual_output="Les miserable")
)
# Print results after the measure is complete
print(dag_metric.score)
print(dag_metric.reason)
import asyncio
asyncio.run(main())