-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain_instructor.py
64 lines (54 loc) · 2.05 KB
/
main_instructor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from utils import draw_circle, encode_image
import instructor
from instructor import Mode
from openai import OpenAI
from pydantic import BaseModel, Field
from pprint import pprint
from dotenv import load_dotenv
load_dotenv()
client = instructor.patch(OpenAI(), mode=Mode.MD_JSON)
class ObjectDetection(BaseModel):
"""
You are an object detection expert.
Find object in image. Top left of the image is [0, 0].
For cases involving the identification of people or animals,
focus on locating and identifying the face of the person or animal.
"""
x: int = Field(description="x coordinate of detected object", default=0)
y: int = Field(description="y coordinate of detected object", default=0)
object_found_details: str = Field(description="Details of detected object.", default="")
image_description: str = Field(descripion="Description of image.", default="")
def ask_gpt4_vision(system_instrutions, question, image_path):
base64_image = encode_image(image_path)
detected = client.chat.completions.create(
response_model=ObjectDetection,
model="gpt-4-vision-preview",
max_tokens=100,
messages=[
{
"role": "system",
"content": system_instrutions
},
{
"role": "user",
"content": [
{"type": "text", "text": question},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
},
],
}
],
)
pprint(detected.model_dump_json())
return {"x": detected.x, "y": detected.y}
# image_path = "assets/kitten-and-puppy.webp"
# image_path = "assets/puppy.jpg"
image_path = "assets/fire.png"
system_instructions = """You are an image recognition expert."""
question = "Detect Fire"
coordinates = ask_gpt4_vision(system_instructions, question, image_path)
draw_circle(image_path, coordinates)