onyx-dot-app · evan-danswer · Feb 4, 2025 · Jan 20, 2025 · Jan 20, 2025 · Jan 21, 2025
diff --git a/.gitignore b/.gitignore
@@ -7,4 +7,6 @@
 .vscode/
 *.sw?
 /backend/tests/regression/answer_quality/search_test_config.yaml
-/web/test-results/
+/web/test-results/
+backend/onyx/agent_search/main/test_data.json
+backend/tests/regression/answer_quality/test_data.json
-backend/tests/regression/answer_quality/test_data.json
+**/test_data.json
-backend/tests/regression/answer_quality/test_data.json
+**/test_data.json
diff --git a/.vscode/env_template.txt b/.vscode/env_template.txt
@@ -52,3 +52,9 @@ BING_API_KEY=<REPLACE THIS>
 # Enable the full set of Danswer Enterprise Edition features
 # NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you are using this for local testing/development)
 ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False
+
+# Agent Search configs  # TODO: Remove give proper namings
+AGENT_RETRIEVAL_STATS=False   # Note: This setting will incur substantial re-ranking effort
+AGENT_RERANKING_STATS=True
+AGENT_MAX_QUERY_RETRIEVAL_RESULTS=20
+AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS=20
diff --git a/backend/alembic/versions/98a5008d8711_agent_tracking.py b/backend/alembic/versions/98a5008d8711_agent_tracking.py
@@ -0,0 +1,107 @@
+"""agent_tracking
+
+Revision ID: 98a5008d8711
+Revises: 2f80c6a2550f
+Create Date: 2025-01-29 17:00:00.000001
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+from sqlalchemy.dialects.postgresql import UUID
+
+# revision identifiers, used by Alembic.
+revision = "98a5008d8711"
+down_revision = "2f80c6a2550f"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "agent__search_metrics",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column("persona_id", sa.Integer(), nullable=True),
+        sa.Column("agent_type", sa.String(), nullable=False),
+        sa.Column("start_time", sa.DateTime(timezone=True), nullable=False),
+        sa.Column("base_duration_s", sa.Float(), nullable=False),
+        sa.Column("full_duration_s", sa.Float(), nullable=False),
+        sa.Column("base_metrics", postgresql.JSONB(), nullable=True),
+        sa.Column("refined_metrics", postgresql.JSONB(), nullable=True),
+        sa.Column("all_metrics", postgresql.JSONB(), nullable=True),
+        sa.ForeignKeyConstraint(
+            ["persona_id"],
+            ["persona.id"],
+        ),
+        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+    # Create sub_question table
+    op.create_table(
+        "agent__sub_question",
+        sa.Column("id", sa.Integer, primary_key=True),
+        sa.Column("primary_question_id", sa.Integer, sa.ForeignKey("chat_message.id")),
+        sa.Column(
+            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id")
+        ),
+        sa.Column("sub_question", sa.Text),
+        sa.Column(
+            "time_created", sa.DateTime(timezone=True), server_default=sa.func.now()
+        ),
+        sa.Column("sub_answer", sa.Text),
+        sa.Column("sub_question_doc_results", postgresql.JSONB(), nullable=True),
+        sa.Column("level", sa.Integer(), nullable=False),
+        sa.Column("level_question_num", sa.Integer(), nullable=False),
+    )
-        sa.Column("primary_question_id", sa.Integer, sa.ForeignKey("chat_message.id")),
-        sa.Column(
-            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id")
-        ),
-        sa.Column("sub_question", sa.Text),
-        sa.Column(
-            "time_created", sa.DateTime(timezone=True), server_default=sa.func.now()
-        ),
-        sa.Column("sub_answer", sa.Text),
-        sa.Column("sub_question_doc_results", postgresql.JSONB(), nullable=True),
-        sa.Column("level", sa.Integer(), nullable=False),
-        sa.Column("level_question_num", sa.Integer(), nullable=False),
-    )
+        sa.Column("primary_question_id", sa.Integer, sa.ForeignKey("chat_message.id", ondelete="CASCADE")),
+        sa.Column(
+            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id", ondelete="CASCADE")
+        ),
+        sa.Column("sub_question", sa.Text),
+        sa.Column(
+            "time_created", sa.DateTime(timezone=True), server_default=sa.func.now()
+        ),
+        sa.Column("sub_answer", sa.Text),
+        sa.Column("sub_question_doc_results", postgresql.JSONB(), nullable=True),
+        sa.Column("level", sa.Integer(), nullable=False),
+        sa.Column("level_question_num", sa.Integer(), nullable=False),
+    )
-        sa.Column("primary_question_id", sa.Integer, sa.ForeignKey("chat_message.id")),
-        sa.Column(
-            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id")
-        ),
-        sa.Column("sub_question", sa.Text),
-        sa.Column(
-            "time_created", sa.DateTime(timezone=True), server_default=sa.func.now()
-        ),
-        sa.Column("sub_answer", sa.Text),
-        sa.Column("sub_question_doc_results", postgresql.JSONB(), nullable=True),
-        sa.Column("level", sa.Integer(), nullable=False),
-        sa.Column("level_question_num", sa.Integer(), nullable=False),
-    )
+        sa.Column("primary_question_id", sa.Integer, sa.ForeignKey("chat_message.id", ondelete="CASCADE")),
+        sa.Column(
+            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id", ondelete="CASCADE")
+        ),
+        sa.Column("sub_question", sa.Text),
+        sa.Column(
+            "time_created", sa.DateTime(timezone=True), server_default=sa.func.now()
+        ),
+        sa.Column("sub_answer", sa.Text),
+        sa.Column("sub_question_doc_results", postgresql.JSONB(), nullable=True),
+        sa.Column("level", sa.Integer(), nullable=False),
+        sa.Column("level_question_num", sa.Integer(), nullable=False),
+    )
+
+    # Create sub_query table
+    op.create_table(
+        "agent__sub_query",
+        sa.Column("id", sa.Integer, primary_key=True),
+        sa.Column(
+            "parent_question_id", sa.Integer, sa.ForeignKey("agent__sub_question.id")
+        ),
+        sa.Column(
+            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id")
+        ),
-            "parent_question_id", sa.Integer, sa.ForeignKey("agent__sub_question.id")
-        ),
-        sa.Column(
-            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id")
-        ),
+            "parent_question_id", sa.Integer, sa.ForeignKey("agent__sub_question.id", ondelete="CASCADE")
+        ),
+        sa.Column(
+            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id", ondelete="CASCADE")
+        ),
-            "parent_question_id", sa.Integer, sa.ForeignKey("agent__sub_question.id")
-        ),
-        sa.Column(
-            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id")
-        ),
+            "parent_question_id", sa.Integer, sa.ForeignKey("agent__sub_question.id", ondelete="CASCADE")
+        ),
+        sa.Column(
+            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id", ondelete="CASCADE")
+        ),
+        sa.Column("sub_query", sa.Text),
+        sa.Column(
+            "time_created", sa.DateTime(timezone=True), server_default=sa.func.now()
+        ),
+    )
+
+    # Create sub_query__search_doc association table
+    op.create_table(
+        "agent__sub_query__search_doc",
+        sa.Column(
+            "sub_query_id",
+            sa.Integer,
+            sa.ForeignKey("agent__sub_query.id"),
+            primary_key=True,
+        ),
+        sa.Column(
+            "search_doc_id",
+            sa.Integer,
+            sa.ForeignKey("search_doc.id"),
+            primary_key=True,
+        ),
+    )
+
+    op.add_column(
+        "chat_message",
+        sa.Column(
+            "refined_answer_improvement",
+            sa.Boolean(),
+            nullable=True,
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_message", "refined_answer_improvement")
+    op.drop_table("agent__sub_query__search_doc")
+    op.drop_table("agent__sub_query")
+    op.drop_table("agent__sub_question")
+    op.drop_table("agent__search_metrics")
diff --git a/backend/ee/onyx/server/query_and_chat/chat_backend.py b/backend/ee/onyx/server/query_and_chat/chat_backend.py
@@ -179,6 +179,7 @@ def handle_simplified_chat_message(
         chunks_below=0,
         full_doc=chat_message_req.full_doc,
         structured_response_format=chat_message_req.structured_response_format,
+        use_agentic_search=chat_message_req.use_agentic_search,
     )
 
     packets = stream_chat_message_objects(
@@ -301,6 +302,7 @@ def handle_send_message_simple_with_history(
         chunks_below=0,
         full_doc=req.full_doc,
         structured_response_format=req.structured_response_format,
+        use_agentic_search=req.use_agentic_search,
     )
 
     packets = stream_chat_message_objects(

diff --git a/backend/ee/onyx/server/query_and_chat/models.py b/backend/ee/onyx/server/query_and_chat/models.py
@@ -57,6 +57,9 @@ class BasicCreateChatMessageRequest(ChunkContext):
     # https://platform.openai.com/docs/guides/structured-outputs/introduction
     structured_response_format: dict | None = None
 
+    # If True, uses agentic search instead of basic search
+    use_agentic_search: bool = False
+
 
 class BasicCreateChatMessageWithHistoryRequest(ChunkContext):
     # Last element is the new query. All previous elements are historical context
@@ -71,6 +74,8 @@ class BasicCreateChatMessageWithHistoryRequest(ChunkContext):
     # only works if using an OpenAI model. See the following for more details:
     # https://platform.openai.com/docs/guides/structured-outputs/introduction
     structured_response_format: dict | None = None
+    # If True, uses agentic search instead of basic search
+    use_agentic_search: bool = False
 
 
 class SimpleDoc(BaseModel):
@@ -120,9 +125,12 @@ class OneShotQARequest(ChunkContext):
     # will also disable Thread-based Rewording if specified
     query_override: str | None = None
 
-    # If True, skips generative an AI response to the search query
+    # If True, skips generating an AI response to the search query
     skip_gen_ai_answer_generation: bool = False
 
+    # If True, uses agentic search instead of basic search
+    use_agentic_search: bool = False
+
     @model_validator(mode="after")
     def check_persona_fields(self) -> "OneShotQARequest":
         if self.persona_override_config is None and self.persona_id is None:

diff --git a/backend/ee/onyx/server/query_and_chat/query_backend.py b/backend/ee/onyx/server/query_and_chat/query_backend.py
@@ -196,6 +196,8 @@ def get_answer_stream(
         retrieval_details=query_request.retrieval_options,
         rerank_settings=query_request.rerank_settings,
         db_session=db_session,
+        use_agentic_search=query_request.use_agentic_search,
+        skip_gen_ai_answer_generation=query_request.skip_gen_ai_answer_generation,
     )
 
     packets = stream_chat_message_objects(

diff --git a/backend/onyx/agents/agent_search/basic/graph_builder.py b/backend/onyx/agents/agent_search/basic/graph_builder.py
@@ -0,0 +1,97 @@
+from langgraph.graph import END
+from langgraph.graph import START
+from langgraph.graph import StateGraph
+
+from onyx.agents.agent_search.basic.states import BasicInput
+from onyx.agents.agent_search.basic.states import BasicOutput
+from onyx.agents.agent_search.basic.states import BasicState
+from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
+    basic_use_tool_response,
+)
+from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
+from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
+    prepare_tool_input,
+)
+from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def basic_graph_builder() -> StateGraph:
+    graph = StateGraph(
+        state_schema=BasicState,
+        input=BasicInput,
+        output=BasicOutput,
+    )
+
+    ### Add nodes ###
+
+    graph.add_node(
+        node="prepare_tool_input",
+        action=prepare_tool_input,
+    )
+
+    graph.add_node(
+        node="llm_tool_choice",
+        action=llm_tool_choice,
+    )
+
+    graph.add_node(
+        node="tool_call",
+        action=tool_call,
+    )
+
+    graph.add_node(
+        node="basic_use_tool_response",
+        action=basic_use_tool_response,
+    )
+
+    ### Add edges ###
+
+    graph.add_edge(start_key=START, end_key="prepare_tool_input")
+
+    graph.add_edge(start_key="prepare_tool_input", end_key="llm_tool_choice")
+
+    graph.add_conditional_edges("llm_tool_choice", should_continue, ["tool_call", END])
+
+    graph.add_edge(
+        start_key="tool_call",
+        end_key="basic_use_tool_response",
+    )
+
+    graph.add_edge(
+        start_key="basic_use_tool_response",
+        end_key=END,
+    )
+
+    return graph
+
+
+def should_continue(state: BasicState) -> str:
+    return (
+        # If there are no tool calls, basic graph already streamed the answer
+        END
+        if state.tool_choice is None
+        else "tool_call"
+    )
+
+
+if __name__ == "__main__":
+    from onyx.db.engine import get_session_context_manager
+    from onyx.context.search.models import SearchRequest
+    from onyx.llm.factory import get_default_llms
+    from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
+
+    graph = basic_graph_builder()
+    compiled_graph = graph.compile()
+    input = BasicInput(_unused=True)
+    primary_llm, fast_llm = get_default_llms()
+    with get_session_context_manager() as db_session:
+        config, _ = get_test_config(
+            db_session=db_session,
+            primary_llm=primary_llm,
+            fast_llm=fast_llm,
+            search_request=SearchRequest(query="How does onyx use FastAPI?"),
+        )
+        compiled_graph.invoke(input, config={"metadata": {"config": config}})
diff --git a/backend/onyx/agents/agent_search/basic/states.py b/backend/onyx/agents/agent_search/basic/states.py
@@ -0,0 +1,35 @@
+from typing import TypedDict
+
+from langchain_core.messages import AIMessageChunk
+from pydantic import BaseModel
+
+from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
+from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
+from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
+
+# States contain values that change over the course of graph execution,
+# Config is for values that are set at the start and never change.
+# If you are using a value from the config and realize it needs to change,
+# you should add it to the state and use/update the version in the state.
+
+
+## Graph Input State
+class BasicInput(BaseModel):
+    # Langgraph needs a nonempty input, but we pass in all static
+    # data through a RunnableConfig.
+    _unused: bool = True
+
+
+## Graph Output State
+class BasicOutput(TypedDict):
+    tool_call_chunk: AIMessageChunk
+
+
+## Graph State
+class BasicState(
+    BasicInput,
+    ToolChoiceInput,
+    ToolCallUpdate,
+    ToolChoiceUpdate,
+):
+    pass
diff --git a/backend/onyx/agents/agent_search/basic/utils.py b/backend/onyx/agents/agent_search/basic/utils.py
@@ -0,0 +1,63 @@
+from collections.abc import Iterator
+from typing import cast
+
+from langchain_core.messages import AIMessageChunk
+from langchain_core.messages import BaseMessage
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import LlmDoc
+from onyx.chat.stream_processing.answer_response_handler import AnswerResponseHandler
+from onyx.chat.stream_processing.answer_response_handler import CitationResponseHandler
+from onyx.chat.stream_processing.answer_response_handler import (
+    PassThroughAnswerResponseHandler,
+)
+from onyx.chat.stream_processing.utils import map_document_id_order
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def process_llm_stream(
+    messages: Iterator[BaseMessage],
+    should_stream_answer: bool,
+    writer: StreamWriter,
+    final_search_results: list[LlmDoc] | None = None,
+    displayed_search_results: list[LlmDoc] | None = None,
+) -> AIMessageChunk:
+    tool_call_chunk = AIMessageChunk(content="")
+
+    if final_search_results and displayed_search_results:
+        answer_handler: AnswerResponseHandler = CitationResponseHandler(
+            context_docs=final_search_results,
+            final_doc_id_to_rank_map=map_document_id_order(final_search_results),
+            display_doc_id_to_rank_map=map_document_id_order(displayed_search_results),
+        )
-    if final_search_results and displayed_search_results:
-        answer_handler: AnswerResponseHandler = CitationResponseHandler(
-            context_docs=final_search_results,
-            final_doc_id_to_rank_map=map_document_id_order(final_search_results),
-            display_doc_id_to_rank_map=map_document_id_order(displayed_search_results),
-        )
+    if not (final_search_results and displayed_search_results):
+        answer_handler = PassThroughAnswerResponseHandler()
+    else:
+        answer_handler: AnswerResponseHandler = CitationResponseHandler(
+            context_docs=final_search_results,
+            final_doc_id_to_rank_map=map_document_id_order(final_search_results),
+            display_doc_id_to_rank_map=map_document_id_order(displayed_search_results),
+        )
-    if final_search_results and displayed_search_results:
-        answer_handler: AnswerResponseHandler = CitationResponseHandler(
-            context_docs=final_search_results,
-            final_doc_id_to_rank_map=map_document_id_order(final_search_results),
-            display_doc_id_to_rank_map=map_document_id_order(displayed_search_results),
-        )
+    if not (final_search_results and displayed_search_results):
+        answer_handler = PassThroughAnswerResponseHandler()
+    else:
+        answer_handler: AnswerResponseHandler = CitationResponseHandler(
+            context_docs=final_search_results,
+            final_doc_id_to_rank_map=map_document_id_order(final_search_results),
+            display_doc_id_to_rank_map=map_document_id_order(displayed_search_results),
+        )
+    else:
+        answer_handler = PassThroughAnswerResponseHandler()
+
+    full_answer = ""
+    # This stream will be the llm answer if no tool is chosen. When a tool is chosen,
+    # the stream will contain AIMessageChunks with tool call information.
+    for message in messages:
+        answer_piece = message.content
+        if not isinstance(answer_piece, str):
+            # this is only used for logging, so fine to
+            # just add the string representation
+            answer_piece = str(answer_piece)
+        full_answer += answer_piece
+
+        if isinstance(message, AIMessageChunk) and (
+            message.tool_call_chunks or message.tool_calls
+        ):
+            tool_call_chunk += message  # type: ignore
+        elif should_stream_answer:
+            for response_part in answer_handler.handle_response_part(message, []):
+                write_custom_event(
+                    "basic_response",
+                    response_part,
+                    writer,
+                )
+
+    logger.debug(f"Full answer: {full_answer}")
+    return cast(AIMessageChunk, tool_call_chunk)