Skip to content

Commit

Permalink
Add kino and kino_qwen2_5_vl
Browse files Browse the repository at this point in the history
Update

Support navit processor

Add pixels and error handle

Resize for minimal smart resize

Add kino qwen2_5

Fix vdc 499 for azure

Fix gpt model

Revise kino logic for handling inputs

Add error handl for vita

Use qwen vl utils to process visual
  • Loading branch information
kcz358 committed Feb 28, 2025
1 parent 00febf5 commit 7ffac2c
Show file tree
Hide file tree
Showing 16 changed files with 817 additions and 69 deletions.
2 changes: 2 additions & 0 deletions lmms_eval/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
"internvideo2": "InternVideo2",
"internvl": "InternVLChat",
"internvl2": "InternVL2",
"kino": "Kino",
"kino_qwen_2_5": "KinoQwen2_5",
"llama_vid": "LLaMAVid",
"llama_vision": "LlamaVision",
"llava": "Llava",
Expand Down
372 changes: 372 additions & 0 deletions lmms_eval/models/kino.py

Large diffs are not rendered by default.

367 changes: 367 additions & 0 deletions lmms_eval/models/kino_qwen_2_5.py

Large diffs are not rendered by default.

76 changes: 40 additions & 36 deletions lmms_eval/models/vita.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,42 +332,46 @@ def _collate(x):
if "num_beams" not in gen_kwargs:
gen_kwargs["num_beams"] = 1

with torch.inference_mode():
output_ids = self.model.generate(
input_ids,
images=image_tensor,
audios=audios,
do_sample=False,
temperature=gen_kwargs["temperature"],
top_p=gen_kwargs["top_p"],
num_beams=gen_kwargs["num_beams"],
output_scores=True,
return_dict_in_generate=True,
max_new_tokens=gen_kwargs["max_new_tokens"],
use_cache=True,
stopping_criteria=[stopping_criteria],
shared_v_pid_stride=None, # 2#16#8#4#1#None,
)
output_ids = output_ids.sequences
input_token_len = input_ids.shape[1]
if self.model_type == "mixtral-8x7b":
n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item()
if n_diff_input_output > 0:
print(f"[Warning] {n_diff_input_output} output_ids are not the same as the input_ids")
output_ids = output_ids[:, input_token_len:]
outputs = self.tokenizer.batch_decode(output_ids, skip_special_tokens=False)[0]

outputs = outputs.strip()
# Sometimes it contains a ☜, I remove it here
if outputs.startswith(self.tokenizer.decode(145789)):
outputs = outputs[len(self.tokenizer.decode(145789)) :]
if stop_str == "<|im_start|>":
actual_stop_str = "<|im_end|>"
else:
actual_stop_str = stop_str
if outputs.endswith(actual_stop_str):
outputs = outputs[: -len(actual_stop_str)]
outputs = outputs.strip()
try:
with torch.inference_mode():
output_ids = self.model.generate(
input_ids,
images=image_tensor,
audios=audios,
do_sample=False,
temperature=gen_kwargs["temperature"],
top_p=gen_kwargs["top_p"],
num_beams=gen_kwargs["num_beams"],
output_scores=True,
return_dict_in_generate=True,
max_new_tokens=gen_kwargs["max_new_tokens"],
use_cache=True,
stopping_criteria=[stopping_criteria],
shared_v_pid_stride=None, # 2#16#8#4#1#None,
)
output_ids = output_ids.sequences
input_token_len = input_ids.shape[1]
if self.model_type == "mixtral-8x7b":
n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item()
if n_diff_input_output > 0:
print(f"[Warning] {n_diff_input_output} output_ids are not the same as the input_ids")
output_ids = output_ids[:, input_token_len:]
outputs = self.tokenizer.batch_decode(output_ids, skip_special_tokens=False)[0]

outputs = outputs.strip()
# Sometimes it contains a ☜, I remove it here
if outputs.startswith(self.tokenizer.decode(145789)):
outputs = outputs[len(self.tokenizer.decode(145789)) :]
if stop_str == "<|im_start|>":
actual_stop_str = "<|im_end|>"
else:
actual_stop_str = stop_str
if outputs.endswith(actual_stop_str):
outputs = outputs[: -len(actual_stop_str)]
outputs = outputs.strip()
except Exception as e:
eval_logger.info(f"Erro {e} : When generating")
outputs = ""
res.append(outputs)
self.cache_hook.add_partial("generate_until", (prompt, gen_kwargs), outputs)
pbar.update(1)
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/air_bench/_default_template_yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ dataset_kwargs:
token: True

metadata:
gpt_eval_model_name: gpt-4o
gpt_eval_model_name: gpt-4o-2024-08-06
version: 0.0
2 changes: 1 addition & 1 deletion lmms_eval/tasks/alpaca_audio/alpaca_audio.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@ metric_list:
process_results: !function utils.alpaca_audio_process_results

metadata:
gpt_eval_model_name: gpt-4o
gpt_eval_model_name: gpt-4o-2024-08-06
version: 0.0
2 changes: 1 addition & 1 deletion lmms_eval/tasks/clotho_aqa/_default_template_yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ doc_to_visual: !function utils.clotho_aqa_doc_to_audio
doc_to_text: !function utils.clotho_aqa_doc_to_text

metadata:
gpt_eval_model_name: gpt-4o
gpt_eval_model_name: gpt-4o-2024-08-06
version: 0.0
2 changes: 1 addition & 1 deletion lmms_eval/tasks/llava-in-the-wild/llava-in-the-wild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ metric_list:
higher_is_better: true
metadata:
version: 0.0
gpt_eval_model_name: "gpt-4-0613"
gpt_eval_model_name: "gpt-4o-2024-08-06"
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/llava_wilder/_default_template_wilder_yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ metric_list:
metadata:
version: 0.0
api_type : openai
gpt_eval_model_name: "gpt-4-vision-preview"
gpt_eval_model_name: "gpt-4o-2024-08-06"
2 changes: 1 addition & 1 deletion lmms_eval/tasks/mathvista/mathvista.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ task:
- mathvista_test
metadata:
version: 0.0
gpt_eval_model_name: "gpt-3.5-turbo"
gpt_eval_model_name: "gpt-4o-2024-08-06"
quick_extract: false
42 changes: 19 additions & 23 deletions lmms_eval/tasks/mathvista/mathvista_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,33 +146,29 @@ def is_valid_triangle(a, b, perimeter):


class MathVistaEvaluator:
API_TYPE = os.getenv("API_TYPE", "openai")

if API_TYPE == "openai":
API_URL = os.getenv("OPENAI_API_URL", "https://api.openai.com/v1/chat/completions")
API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_API_KEY")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}
elif API_TYPE == "azure":
API_URL = os.getenv("AZURE_ENDPOINT", "https://api.cognitive.microsoft.com/sts/v1.0/issueToken")
API_KEY = os.getenv("AZURE_API_KEY", "YOUR_API_KEY")
headers = {
"api-key": API_KEY,
"Content-Type": "application/json",
}

def __init__(self, api_key, gpt_model="gpt-3.5-turbo", quick_extract=False):
self.api_key = api_key
self.gpt_model = gpt_model
self.quick_extract = quick_extract
API_TYPE = os.getenv("API_TYPE", "openai")

if API_TYPE == "openai":
self.API_URL = os.getenv("OPENAI_API_URL", "https://api.openai.com/v1/chat/completions")
self.headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
elif API_TYPE == "azure":
self.API_URL = os.getenv("AZURE_ENDPOINT", "https://api.cognitive.microsoft.com/sts/v1.0/issueToken")
self.API_KEY = os.getenv("AZURE_API_KEY", "YOUR_API_KEY")
self.headers = {
"api-key": self.api_key,
"Content-Type": "application/json",
}
self.API_TYPE = API_TYPE

def _post_request(self, payload):
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
headers = self.headers
response = requests.post(self.API_URL, headers=headers, json=payload, timeout=30)
response.raise_for_status()
return response.json()
Expand All @@ -183,8 +179,8 @@ def get_chat_response(self, prompt, temperature=0, max_tokens=256, n=1, patience
]
payload = {"model": self.gpt_model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens, "n": n}

if self.API_TYPE == "azure":
payload.pop("model")
# if self.API_TYPE == "azure":
# payload.pop("model")

while patience > 0:
patience -= 1
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/openhermes/openhermes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@ metric_list:
process_results: !function utils.openhermes_process_results

metadata:
gpt_eval_model_name: gpt-4o
gpt_eval_model_name: gpt-4o-2024-08-06
version: 0.0
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ lmms_eval_specific_kwargs:

metadata:
version: 0.0
gpt_eval_model_name: gpt-3.5-turbo-0613
gpt_eval_model_name: gpt-4o-2024-08-06
7 changes: 7 additions & 0 deletions lmms_eval/tasks/video_detail_description/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}
elif API_TYPE == "azure":
API_URL = os.getenv("AZURE_ENDPOINT", "https://api.cognitive.microsoft.com/sts/v1.0/issueToken")
API_KEY = os.getenv("AZURE_API_KEY", "YOUR_API_KEY")
headers = {
"api-key": API_KEY,
"Content-Type": "application/json",
}

# A bit ugly here
# But the idea is that we will unzip all the zip files
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/wavcaps/wavcaps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ metric_list:
aggregation: !function utils.wavcaps_aggregate_results
higher_is_better: true
metadata:
gpt_eval_model_name: gpt-4o
gpt_eval_model_name: gpt-4o-2024-08-06
version: 0.0
2 changes: 1 addition & 1 deletion lmms_eval/tasks/worldqa/_default_template_yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ dataset_kwargs:
cache_dir: multi-hop-reasoning
metadata:
version: 0.0
gpt_eval_model_name: "gpt-4-0613"
gpt_eval_model_name: "gpt-4o-2024-08-06"

0 comments on commit 7ffac2c

Please sign in to comment.