Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP refactor and add tests #23

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
79 changes: 79 additions & 0 deletions DeepResearchTool/deep_research/summary_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import logging
from typing import Callable, Optional

from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage


class SummaryGenerator:
_user_query: str
_topics: list[dict]
_notes_getter: Callable[[str], str]
_chat: Callable[[str], str]

def __init__(
self,
user_query: str,
topics: list[dict],
notes_getter: Callable[[str], str],
chat: Optional[Callable[[str], str]] = None,
) -> None:
self._user_query = user_query
self._topics = topics
self._notes_getter = notes_getter
self._chat = chat or self._get_response_from_openai

def _format_topic(self, topic: dict) -> str:
notes = self._notes_getter(topic["notes_file"])
topic_str = f"""
Topic name: {topic["name"]}
Topic description: {topic["description"]}
Relevant because: {topic["relevant_because"]}
Notes: {notes}
"""
return topic_str

def _generate_markdown_prompt(self, user_query: str, topics: list[dict]) -> str:
topics_str_list = [self._format_topic(topic) for topic in topics]
markdown_prompt = f"""
The user query is: {user_query}

###

Given the following topics and notes about the topic, write an article addressing the user query
the best you can. If there is a question, try to answer it. If the user query has incorrect
facts or assumptions, address that.

Start with a problem statement of some sort based on the user query, then follow up with a conclusion.
After the conclusion, explain how that conclusion was derived from the
topics researched. If needed, create a section for relevant topic, if it is important enough,
and explain how the topic contributes to the conclusion. You do not need to specifically mention
the conclusion when describing topics.

When you can, cite your sources

### The topics are:

{" # next topic # ".join(topics_str_list)}

# Reminder! The conclusion should be helpful and specific. If there are upper and lower bounds or circumstances where something
may be true or false, then define it. If you cannot, then identify further research needed to get there. Do not make anything up!
If you do not know why you know something, then do not mention it, or identify further research needed to confirm it.

Use inline citations.

Markdown file contents:
"""
return markdown_prompt

def get_markdown_summary(self) -> str:
markdown_prompt = self._generate_markdown_prompt(self._user_query, self._topics)
logging.warning(markdown_prompt)
return self._chat(markdown_prompt)

def _get_response_from_openai(self, markdown_prompt: str) -> str:
OPEN_AI_MODEL = "gpt-4"
chat = ChatOpenAI(model=OPEN_AI_MODEL, temperature=0)
system_message_prompt = SystemMessage(content=markdown_prompt)
response = chat([system_message_prompt])
return response.content
10 changes: 10 additions & 0 deletions DeepResearchTool/deep_research/topics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from dataclasses import dataclass


@dataclass
class Topic:
name: str
description: str
notes_file: str
relevant_because: str
researched: bool = False
71 changes: 10 additions & 61 deletions DeepResearchTool/deep_research_writer_tool.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import logging
from typing import Optional, Type
from typing import Any, Optional, Type

from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage
Expand All @@ -10,6 +10,7 @@
from superagi.tools.base_tool import BaseTool

from DeepResearchTool.const import SINGLE_FILE_OUTPUT_FILE, TOPICS_FILE, USER_QUERY_FILE
from DeepResearchTool.deep_research.summary_generator import SummaryGenerator


class DeepResearchWriter(BaseModel):
Expand All @@ -30,73 +31,21 @@ class DeepResearchWriterTool(BaseTool):
llm: Optional[BaseLlm] = None
resource_manager: Optional[FileManager] = None

def _notes_getter(self, notes_file: str) -> str:
assert self.resource_manager

return self.resource_manager.read_file(notes_file)

def _execute(self, desired_output_format: str | None = None) -> str:
assert self.resource_manager
assert self.llm

self.llm.temperature = 0

user_query = self.resource_manager.read_file(USER_QUERY_FILE)
topics = self.resource_manager.read_file(TOPICS_FILE)

topics_str_list = []

for topic in json.loads(topics):
notes = self.resource_manager.read_file(topic["notes_file"])
# format is:
# name, description, notes_file, relevant_because, researched
topic_str = f"""
Topic name: {topic["name"]}
Topic description: {topic["description"]}
Relevant because: {topic["relevant_because"]}
Notes: {notes}
"""
topics_str_list.append(topic_str)

markdown_prompt = f"""
The user query is: {user_query}

###

Given the following topics and notes about the topic, write an article addressing the user query
the best you can. If there is an question, try to answer it. If the user query has incorrect
facts or assumptions, address that.

Start with a problem statement of some sort based on the user query, then follow up with a conclusion.
After the conclusion, explain how that conclusion was derived from the
topics researched. If needed, create a section for relevant topic, if it is important enough,
and explain how the topic contributes to the conclusion. You do not need to specifically mention
the conclusion when describing topics.

When you can, cite your sources

### The topics are:

{" # next topic # ".join(topics_str_list)}

# Reminder! The conclusion should be helpful and specific. If there are upper and lower bounds or circumstances where something
may be true or false, then define it. If you cannot, then identify further research needed to get there. Do not make anything up!
If you do not know why you know something, then do not mention it, or identify further research needed to confirm it.

Use inline citations.

Markdown file contents:
"""
logging.warning(markdown_prompt)

OPEN_AI_MODEL = "gpt-4-32k" # not yet available
OPEN_AI_MODEL = "gpt-4"

chat = ChatOpenAI(model=OPEN_AI_MODEL, temperature=0)

system_message_prompt = SystemMessage(content=markdown_prompt)
response = chat([system_message_prompt])
content = response.content

# content = self.llm.chat_completion([{"role": "system", "content": markdown_prompt}])[
# "content"
# ]
user_query, topics = self._read_files()

summary_writer = SummaryGenerator(user_query, topics, self._notes_getter)
content = summary_writer.get_markdown_summary()
self.resource_manager.write_file(SINGLE_FILE_OUTPUT_FILE, content)

return f"Deep research completed! Check the resource manager for {SINGLE_FILE_OUTPUT_FILE} to view the result!"
40 changes: 21 additions & 19 deletions DeepResearchTool/topic_managers.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,43 @@
import json
import logging
from dataclasses import asdict, dataclass
from dataclasses import asdict

from superagi.resource_manager.file_manager import FileManager

from DeepResearchTool.const import TOPICS_FILE
from DeepResearchTool.deep_research.topics import Topic


@dataclass
class Topic:
name: str
description: str
notes_file: str
relevant_because: str
researched: bool = False
class ManagedTopic:
def __init__(self, topic: Topic, file_manager: FileManager) -> None:
self.topic = topic
self.file_manager = file_manager

def initialize_notes_file(self, file_manager: FileManager) -> None:
logging.info(f"Initializing notes file: {self.notes_file}")
file_manager.write_file(self.notes_file, json.dumps([]))
def initialize_notes_file(self) -> None:
logging.info(f"Initializing notes file: {self.topic.notes_file}")
self.file_manager.write_file(self.topic.notes_file, json.dumps([]))

def mark_as_researched(self, file_manager: FileManager) -> None:
topics_file = json.loads(file_manager.read_file(TOPICS_FILE))
def mark_as_researched(self) -> None:
topics_file = json.loads(self.file_manager.read_file(TOPICS_FILE))
for topic in topics_file:
if topic["name"] == self.name:
if topic["name"] == self.topic.name:
topic["researched"] = True
break
file_manager.write_file(TOPICS_FILE, json.dumps(topics_file))
self.file_manager.write_file(TOPICS_FILE, json.dumps(topics_file))


class TopicsManager:
def __init__(self, file_manager: FileManager) -> None:
self._file_manager = file_manager

def load_topics(self) -> list[Topic]:
return [Topic(**topic) for topic in json.loads(self._file_manager.read_file(TOPICS_FILE))]
def load_topics(self) -> list[ManagedTopic]:
return [
ManagedTopic(Topic(**topic), self._file_manager)
for topic in json.loads(self._file_manager.read_file(TOPICS_FILE))
]

def write_topics(self, topics: list[Topic]) -> None:
def write_topics(self, topics: list[ManagedTopic]) -> None:
writing_topics = [topic.topic for topic in topics]
self._file_manager.write_file(
TOPICS_FILE, json.dumps([asdict(topic) for topic in topics])
TOPICS_FILE, json.dumps([asdict(topic) for topic in writing_topics])
)
Loading