-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvideo_prompt.py
129 lines (123 loc) · 4.46 KB
/
video_prompt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import cv2
import base64
import time
from openai import OpenAI
import os
import requests
class VideoPrompt:
"""
Creates a video prompt object that extracts frames from a video
and generates a description and voiceover script.
"""
def __init__(self, api_key, video_path):
self.api_key = api_key
self.video_path = video_path
self.client = OpenAI(api_key=self.api_key)
def extract_frames(self, skip_frames=50):
"""
Extracts frames from a video.
Parameters:
skip_frames (int): The number of frames to skip between each frame.
Returns:
list: A list of base64 encoded frames.
"""
if not os.path.exists(self.video_path):
raise ValueError("Video path does not exist.")
if skip_frames <= 0:
raise ValueError("Skip frames must be greater than 0.")
if not os.path.isfile(self.video_path):
raise ValueError("Video path is not a file.")
if not self.video_path.endswith(".mp4"):
raise ValueError("Video path is not a valid mp4 file.")
if not os.path.exists(self.video_path):
raise ValueError("Video path does not exist.")
if not os.path.isfile(self.video_path):
raise ValueError("Video path is not a file.")
if not self.video_path.endswith(".mp4"):
raise ValueError("Video path is not a valid mp4 file.")
video = cv2.VideoCapture(self.video_path)
base64_frames = []
frame_count = 0
while video.isOpened():
success, frame = video.read()
if not success:
break
if frame_count % skip_frames == 0:
_, buffer = cv2.imencode(".jpg", frame)
base64_frames.append(base64.b64encode(buffer).decode("utf-8"))
frame_count += 1
video.release()
return base64_frames
def generate_description(self, frames):
"""
Generates a description for the video.
Parameters:
frames (list): A list of base64 encoded frames from the video.
Returns:
str: The generated description.
"""
prompt_messages = [
{
"role": "user",
"content": [
"These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video.",
*map(lambda x: {"image": x, "resize": 768}, frames),
],
},
]
params = {
"model": "gpt-4-vision-preview",
"messages": prompt_messages,
"max_tokens": 200,
}
result = self.client.chat.completions.create(**params)
return result.choices[0].message.content
def generate_voiceover_script(self, frames):
"""
Generates a voiceover script for the video.
Parameters:
frames (list): A list of base64 encoded frames from the video.
Returns:
str: The generated voiceover script.
"""
# Use the following code if you want to use the OpenAI API to generate the voiceover script.
prompt_messages = [
{
"role": "user",
"content": [
"These are frames of a video. Create a short voiceover script in the style of David Attenborough. Only include the narration.",
*map(lambda x: {"image": x, "resize": 768}, frames),
],
},
]
params = {
"model": "gpt-4-vision-preview",
"messages": prompt_messages,
"max_tokens": 500,
}
result = self.client.chat.completions.create(**params)
return result.choices[0].message.content
def generate_voiceover_audio(self, script):
"""
Generate Voiceover audio for the video.
Parameters:
script (str): The voiceover script.
Returns:
bytes: The generated audio.
"""
response = requests.post(
"https://api.openai.com/v1/audio/speech",
headers={
"Authorization": f"Bearer {self.api_key}",
},
json={
"model": "tts-1-1106",
"input": script,
"voice": "onyx",
},
timeout=10000
)
audio = b""
for chunk in response.iter_content(chunk_size=1024 * 1024):
audio += chunk
return audio