Skip to content

Commit

Permalink
feat: add metrics to openvino
Browse files Browse the repository at this point in the history
Signed-off-by: Markus Schuettler <markus.schuettler@tngtech.com>
  • Loading branch information
mschuettlerTNG committed Feb 26, 2025
1 parent e0281c3 commit ab6e448
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 9 deletions.
35 changes: 34 additions & 1 deletion OpenVINO/openvino_adapter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import threading
from queue import Empty, Queue
import json
import time
import traceback
from typing import Dict, List, Callable
from openvino_interface import LLMInterface
Expand All @@ -21,6 +22,11 @@ def __init__(self, llm_interface: LLMInterface):
self.singal = threading.Event()
self.llm_interface = llm_interface
self.should_stop = False
self.num_tokens = 0
self.start_time = 0
self.first_token_time = 0
self.last_token_time = 0
self.is_first_token = True

def put_msg(self, data):
self.msg_queue.put_nowait(data)
Expand Down Expand Up @@ -90,7 +96,13 @@ def text_conversation(self, params: LLMParams):


def stream_function(self, output):
if self.is_first_token:
self.first_token_time = time.time()
self.is_first_token = False

self.text_out_callback(output)
self.num_tokens += 1

if self.llm_interface.stop_generate:
self.put_msg("Stopping generation.")
return True # Stop generation
Expand All @@ -105,9 +117,30 @@ def text_conversation_run(
try:
self.llm_interface.load_model(params, callback=self.load_model_callback)

# Reset metrics tracking
self.num_tokens = 0
self.start_time = time.time()
self.first_token_time = 0
self.last_token_time = 0
self.is_first_token = True

prompt = params.prompt
full_prompt = convert_prompt(prompt)
self.llm_interface.create_chat_completion(full_prompt, self.stream_function, params.max_tokens)

# Calculate and send metrics
self.last_token_time = time.time()
metrics_data = {
"type": "metrics",
"num_tokens": self.num_tokens,
"total_time": self.last_token_time - self.start_time,
"overall_tokens_per_second": self.num_tokens / (self.last_token_time - self.start_time) if self.num_tokens > 0 else 0,
"second_plus_tokens_per_second": (self.num_tokens - 1) / (self.last_token_time - self.first_token_time) if self.num_tokens > 1 else None,
"first_token_latency": self.first_token_time - self.start_time if self.num_tokens > 0 else None,
"after_token_latency": (self.last_token_time - self.first_token_time) / (self.num_tokens - 1) if self.num_tokens > 1 else None
}
self.put_msg(metrics_data)
self.put_msg({"type": "finish"})

except Exception as ex:
traceback.print_exc()
Expand Down Expand Up @@ -166,4 +199,4 @@ def process_rag(
prompt = RAG_PROMPT_FORMAT.format(prompt=prompt, context=context)
if text_out_callback is not None:
text_out_callback(rag_source, 2)
return prompt
return prompt
4 changes: 2 additions & 2 deletions WebUI/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion WebUI/src/components/SettingsBasic.vue
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
<div class="border-b border-color-spilter flex flex-col gap-5 py-4">
<h2 class="text-center font-bold">Answer</h2>
<div class="flex flex-col gap-2">
<div v-if="textInference.backend !== 'openVINO'" class="flex items-center gap-2">
<div class="flex items-center gap-2">
<div class="flex items-center gap-5">
<p>Performance Metrics</p>
<button
Expand Down
6 changes: 1 addition & 5 deletions WebUI/src/views/Answer.vue
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,7 @@
</button>
</div>
<div
v-if="
textInference.metricsEnabled &&
textInference.backend !== 'openVINO' &&
chat.metrics
"
v-if="textInference.metricsEnabled && chat.metrics"
class="metrics-info text-xs text-gray-400"
>
<span class="mr-2">{{ chat.metrics.num_tokens }} Tokens</span>
Expand Down

0 comments on commit ab6e448

Please sign in to comment.