-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathgenerate_script.py
165 lines (140 loc) · 5.25 KB
/
generate_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import os
import re
from dotenv import load_dotenv
load_dotenv()
# === Set environment variables to suppress warnings ===
os.environ['GRPC_VERBOSITY'] = 'NONE' # Suppress gRPC logs
os.environ['GLOG_minloglevel'] = '3' # Suppress glog logs (3 = FATAL)
# === Initialize absl logging to suppress warnings ===
import absl.logging
absl.logging.set_verbosity('error')
absl.logging.use_absl_handler()
# === Import other modules after setting environment variables ===
import google.generativeai as genai
import PyPDF2
import requests
from bs4 import BeautifulSoup
# === Rest of your code ===
def read_pdf(pdf_path):
try:
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
text = ""
for page in reader.pages:
extracted = page.extract_text()
if extracted:
text += extracted
return text
except FileNotFoundError:
print(f"Error: PDF file not found at path: {pdf_path}")
return ""
except Exception as e:
print(f"Error reading PDF file: {str(e)}")
return ""
def read_md(md_path):
try:
with open(md_path, 'r', encoding='utf-8') as file:
return file.read()
except FileNotFoundError:
print(f"Error: Markdown file not found at path: {md_path}")
return ""
except Exception as e:
print(f"Error reading Markdown file: {str(e)}")
return ""
def read_url(url):
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
return soup.get_text()
except requests.exceptions.RequestException as e:
print(f"Error accessing URL: {str(e)}")
return ""
except Exception as e:
print(f"Error processing URL content: {str(e)}")
return ""
def read_txt(txt_path):
try:
with open(txt_path, 'r', encoding='utf-8') as file:
return file.read()
except FileNotFoundError:
print(f"Error: Text file not found at path: {txt_path}")
return ""
except Exception as e:
print(f"Error reading text file: {str(e)}")
return ""
def get_content_from_sources():
sources = []
content = ""
while True:
source_type = input("Enter source type (pdf/url/txt/md) or 'done' to finish: ").lower().strip()
if source_type == 'done':
break
if source_type == "pdf":
pdf_path = input("Enter PDF file path: ").strip()
pdf_content = read_pdf(pdf_path)
if pdf_content:
content += pdf_content + "\n"
elif source_type == "url":
url = input("Enter URL: ").strip()
url_content = read_url(url)
if url_content:
content += url_content + "\n"
elif source_type == "md":
md_path = input("Enter Markdown file path: ").strip()
md_content = read_md(md_path)
if md_content:
content += md_content + "\n"
elif source_type == "txt":
txt_path = input("Enter text file path: ").strip()
txt_content = read_txt(txt_path)
if txt_content:
content += txt_content + "\n"
else:
print("Invalid source type. Please try again.")
return content
def load_prompt_template():
try:
with open('system_instructions_script.txt', 'r', encoding='utf-8') as file:
return file.read()
except FileNotFoundError:
raise FileNotFoundError("Prompt template file not found in system_instructions_script.txt")
def create_podcast_script(content):
try:
# Initialize Gemini
genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
model = genai.GenerativeModel('gemini-2.0-flash-exp')
# Load prompt template and format with content
prompt_template = load_prompt_template()
prompt = f"{prompt_template}\n\nContent: {content}"
response = model.generate_content(prompt)
return response.text
except Exception as e:
print(f"Error generating content: {str(e)}")
return None
def clean_podcast_script(script):
# Define a regex pattern to identify the start of the podcast text
podcast_start_pattern = r"^(Speaker A:|Speaker B:)"
# Split the script into lines
lines = script.splitlines()
# Find the first line that matches the podcast start pattern
for i, line in enumerate(lines):
if re.match(podcast_start_pattern, line):
# Return the script starting from the first podcast line
return '\n'.join(lines[i:])
# If no match is found, return the original script
return script
def main():
# Get content from multiple sources
content = get_content_from_sources()
# Generate podcast script
script = create_podcast_script(content)
if script:
# Clean the script before saving
cleaned_script = clean_podcast_script(script)
# Save the cleaned script
with open("podcast_script.txt", "w", encoding='utf-8') as f:
f.write(cleaned_script)
print("Podcast script saved successfully!")
if __name__ == "__main__":
main()