Skip to content

Commit

Permalink
Merge pull request #271 from docqai/feat/file-upload-endpoint
Browse files Browse the repository at this point in the history
feat: file upload endpoint
  • Loading branch information
janaka authored Oct 12, 2024
2 parents dc9fca5 + d6cacf9 commit 9980b47
Show file tree
Hide file tree
Showing 28 changed files with 912 additions and 312 deletions.
3 changes: 3 additions & 0 deletions .copilot-instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- ALWAYS use all the code in the active code file.
- Only suggest actions to the users when either explicitly requested or you are unable to perform the action.
- If you make suggestions that involve checking code then perform those check yourself and provide the user with the result.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "docq"
version = "0.13.8"
version = "0.13.11"
description = "Docq.AI - Your private ChatGPT alternative. Securely unlock knowledge from confidential documents."
authors = ["Docq.AI Team <support@docqai.com>"]
maintainers = ["Docq.AI Team <support@docqai.com>"]
Expand Down
29 changes: 20 additions & 9 deletions source/docq/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from enum import Enum
from typing import Any, Optional, Self

from .config import OrganisationFeatureType, SpaceType
from docq.config import OrganisationFeatureType, SpaceType

_SEPARATOR_FOR_STR = ":"
_SEPARATOR_FOR_VALUE = "_"
Expand Down Expand Up @@ -120,25 +120,36 @@ def create_instance(document_link: str, document_text: str, indexed_on: Optional
)
raise e


class AssistantType(Enum):
"""Persona type."""

SIMPLE_CHAT = "Simple Chat"
AGENT = "Agent"
ASK = "Ask"


@dataclass
class Assistant:
"""A assistant at it's core is a system prompt and user prompt template that tunes the LLM to take on a certain persona and behave/respond a particular way."""

key: str
"""Unique ID for a Persona instance"""
scoped_id: str
"""Scoped ID for a Persona instance."""
name: str
"""Friendly name for the persona"""
type: AssistantType
"""Type of the persona"""
archived: bool
"""Whether the persona is soft deleted or not"""
system_message_content: str
"""Content of the system message. This is where the persona is defined."""
user_prompt_template_content: str
"""Template for the user prompt aka query. This template is used to generate the content for the user prompt/query that will be sent to the LLM (as a user message)."""
llm_settings_collection_key: str
"""The key of the LLM settings collection to use for LLM calls by this assistant. """


class AssistantType(Enum):
"""Persona type."""

SIMPLE_CHAT = "Simple Chat"
AGENT = "Agent"
ASK = "Ask"
created_at: datetime
"""The timestamp when the assistant record was created."""
updated_at: datetime
"""The timestamp when the assistant record was last updated."""
130 changes: 109 additions & 21 deletions source/docq/manage_assistants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging as log
import sqlite3
from contextlib import closing
from datetime import datetime
from datetime import UTC, datetime
from typing import List, Optional

from llama_index.core.base.llms.types import ChatMessage, MessageRole
Expand Down Expand Up @@ -126,8 +126,8 @@
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
"""
# id, name, type, archived, system_prompt_template, user_prompt_template, llm_settings_collection_key, created_at, updated_at, scoped_id
ASSISTANT = tuple[int, str, str, bool, str, str, str, datetime, datetime, str]
# # id, name, type, archived, system_prompt_template, user_prompt_template, llm_settings_collection_key, created_at, updated_at, scoped_id
# ASSISTANT = tuple[int, str, str, bool, str, str, str, datetime, datetime, str]


def _init(org_id: Optional[int] = None) -> None:
Expand Down Expand Up @@ -182,16 +182,71 @@ def get_assistant_fixed(
"""Get the personas."""
result = {}
if assistant_type == AssistantType.SIMPLE_CHAT:
result = {key: Assistant(key=key, **persona, llm_settings_collection_key=llm_settings_collection_key) for key, persona in SIMPLE_CHAT_PERSONAS.items()}
result = {
key: Assistant(
key=key,
type=AssistantType.SIMPLE_CHAT,
archived=False,
**persona,
llm_settings_collection_key=llm_settings_collection_key,
created_at=datetime.now(tz=UTC),
updated_at=datetime.now(tz=UTC),
)
for key, persona in SIMPLE_CHAT_PERSONAS.items()
}
elif assistant_type == AssistantType.AGENT:
result = {key: Assistant(key=key, **persona, llm_settings_collection_key=llm_settings_collection_key) for key, persona in AGENT_PERSONAS.items()}
elif assistant_type == AssistantType.ASK:
result = {key: Assistant(key=key, **persona, llm_settings_collection_key=llm_settings_collection_key) for key, persona in ASK_PERSONAS.items()}
result = {
key: Assistant(
key=key,
type=AssistantType.ASK,
archived=False,
**persona,
llm_settings_collection_key=llm_settings_collection_key,
created_at=datetime.now(tz=UTC),
updated_at=datetime.now(tz=UTC),
)
for key, persona in ASK_PERSONAS.items()
}
else:
result = {
**{key: Assistant(key=key, **persona, llm_settings_collection_key=llm_settings_collection_key) for key, persona in SIMPLE_CHAT_PERSONAS.items()},
**{key: Assistant(key=key, **persona, llm_settings_collection_key=llm_settings_collection_key) for key, persona in AGENT_PERSONAS.items()},
**{key: Assistant(key=key, **persona, llm_settings_collection_key=llm_settings_collection_key) for key, persona in ASK_PERSONAS.items()},
**{
key: Assistant(
key=key,
type=AssistantType.SIMPLE_CHAT,
archived=False,
**persona,
llm_settings_collection_key=llm_settings_collection_key,
created_at=datetime.now(tz=UTC),
updated_at=datetime.now(tz=UTC),
)
for key, persona in SIMPLE_CHAT_PERSONAS.items()
},
**{
key: Assistant(
key=key,
type=AssistantType.AGENT,
archived=False,
**persona,
llm_settings_collection_key=llm_settings_collection_key,
created_at=datetime.now(tz=UTC),
updated_at=datetime.now(tz=UTC),
)
for key, persona in AGENT_PERSONAS.items()
},
**{
key: Assistant(
key=key,
type=AssistantType.ASK,
archived=False,
**persona,
llm_settings_collection_key=llm_settings_collection_key,
created_at=datetime.now(tz=UTC),
updated_at=datetime.now(tz=UTC),
)
for key, persona in ASK_PERSONAS.items()
},
}
return result

Expand All @@ -207,31 +262,37 @@ def get_assistant_or_default(assistant_scoped_id: Optional[str] = None, org_id:
"""
if assistant_scoped_id:
assistant_data = get_assistant(assistant_scoped_id=assistant_scoped_id, org_id=org_id)
return Assistant(
key=str(assistant_data[0]),
name=assistant_data[1],
system_message_content=assistant_data[4],
user_prompt_template_content=assistant_data[5],
llm_settings_collection_key=assistant_data[6],
)
return assistant_data
# return Assistant(
# key=str(assistant_data[0]),
# name=assistant_data[1],
# system_message_content=assistant_data[4],
# user_prompt_template_content=assistant_data[5],
# llm_settings_collection_key=assistant_data[6],
# )
else:
key = "default"
return Assistant(
key=key,
llm_settings_collection_key="azure_openai_with_local_embedding",
scoped_id=f"global_{key}",
type=AssistantType.SIMPLE_CHAT,
archived=False,
**SIMPLE_CHAT_PERSONAS[key],
llm_settings_collection_key="azure_openai_with_local_embedding",
created_at=datetime.now(tz=UTC),
updated_at=datetime.now(tz=UTC),
)


def list_assistants(org_id: Optional[int] = None, assistant_type: Optional[AssistantType] = None) -> list[ASSISTANT]:
def list_assistants(org_id: Optional[int] = None, assistant_type: Optional[AssistantType] = None) -> list[Assistant]:
"""List the assistants.
Args:
org_id (Optional[int]): The current org id. If None then will try to get from global scope table.
assistant_type (Optional[AssistantType]): The assistant type.
Returns:
list[ASSISTANT]: The list of assistants. This includes a compound ID that of ID + scope. This is to avoid ID clashes between global and org scope tables on gets.
list[Assistant]: The list of assistants. This includes a compound ID that of ID + scope. This is to avoid ID clashes between global and org scope tables on gets.
"""
scope = "global"
if org_id:
Expand All @@ -250,13 +311,28 @@ def list_assistants(org_id: Optional[int] = None, assistant_type: Optional[Assis
) as connection, closing(connection.cursor()) as cursor:
cursor.execute(sql, params)
rows = cursor.fetchall()
# return [
# (row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], f"{scope}_{row[0]}")
# for row in rows
# ]
return [
(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], f"{scope}_{row[0]}")
Assistant(
key=str(row[0]),
name=row[1],
type=row[2],
archived=row[3],
system_message_content=row[4],
user_prompt_template_content=row[5],
llm_settings_collection_key=row[6],
created_at=row[7],
updated_at=row[8],
scoped_id=f"{scope}_{row[0]}",
)
for row in rows
]


def get_assistant(assistant_scoped_id: str, org_id: Optional[int]) -> ASSISTANT:
def get_assistant(assistant_scoped_id: str, org_id: Optional[int]) -> Assistant:
"""Get the assistant.
If just assistant_id then will try to get from global scope table.
Expand Down Expand Up @@ -287,7 +363,19 @@ def get_assistant(assistant_scoped_id: str, org_id: Optional[int]) -> ASSISTANT:
)
else:
raise ValueError(f"No Assistant with: id = '{id_}' in global scope. scope= '{scope}'")
return (row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], assistant_scoped_id)
# return (row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], assistant_scoped_id)
return Assistant(
key=str(row[0]),
name=row[1],
type=AssistantType(row[2].capitalize()),
archived=row[3],
system_message_content=row[4],
user_prompt_template_content=row[5],
llm_settings_collection_key=row[6],
created_at=row[7],
updated_at=row[8],
scoped_id=f"{scope}_{row[0]}",
)


def create_or_update_assistant(
Expand Down
11 changes: 7 additions & 4 deletions source/docq/manage_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,20 @@
from llama_index.core.schema import NodeWithScore
from streamlit import runtime

from .data_source.main import DocumentMetadata
from .domain import SpaceKey
from .manage_spaces import reindex
from .support.store import get_upload_dir, get_upload_file
from docq.data_source.main import DocumentMetadata
from docq.domain import SpaceKey
from docq.manage_spaces import reindex
from docq.support.store import get_upload_dir, get_upload_file


def upload(filename: str, content: bytes, space: SpaceKey) -> None:
"""Upload the file to the space."""
with open(get_upload_file(space, filename), "wb") as f:
f.write(content)

# TODO: refactor to only kick off re-indexing the saved file not the whole space.
# TODO: add error handling and return success/failure status.
# TODO: to handle large files and resumable uploads, switch content to BinaryIO and then write chunks in a loop.
reindex(space)


Expand Down
6 changes: 3 additions & 3 deletions source/docq/manage_spaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def _format_space(row: Any) -> SPACE:
row: (id, org_id, name, summary, archived, datasource_type, datasource_configs, space_type, created_at, updated_at)
Returns:
tuple[int, int, str, str, bool, str, dict, datetime, datetime] - [id, org_id, name, summary, archived, datasource_type, datasource_configs, created_at, updated_at]
tuple[int, int, str, str, bool, str, dict, datetime, datetime] - [id, org_id, name, summary, archived, datasource_type, datasource_configs, space_type, created_at, updated_at]
"""
return (row[0], row[1], row[2], row[3], bool(row[4]), row[5], json.loads(row[6]), row[7], row[8], row[9])

Expand Down Expand Up @@ -150,7 +150,7 @@ def list_space(org_id: int, space_type: Optional[str] = None) -> list[SPACE]:
)

rows = cursor.fetchall()
print("spaces:", rows)

return [_format_space(row) for row in rows]


Expand Down Expand Up @@ -255,7 +255,7 @@ def get_shared_spaces(space_ids: List[int]) -> list[SPACE]:
"""Get a shared spaces by ids.
Returns:
list[tuple[int, int, str, str, bool, str, dict, datetime, datetime]] - [id, org_id, name, summary, archived, datasource_type, datasource_configs, created_at, updated_at]
list[tuple[int, int, str, str, bool, str, dict, datetime, datetime]] - [id, org_id, name, summary, archived, datasource_type, datasource_configs, space_type, created_at, updated_at]
"""
log.debug("get_shared_spaces(): Getting space with ids=%s", space_ids)
with closing(
Expand Down
17 changes: 12 additions & 5 deletions source/docq/run_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
from llama_index.core.llms import ChatMessage, MessageRole

from docq.config import OrganisationFeatureType
from docq.domain import FeatureKey, SpaceKey
from docq.manage_assistants import Assistant
from docq.domain import Assistant, FeatureKey, SpaceKey
from docq.manage_documents import format_document_sources
from docq.model_selection.main import LlmUsageSettingsCollection
from docq.support.llm import query_error, run_ask, run_chat
from docq.support.store import (
get_history_table_name,
get_history_thread_table_name,
get_public_sqlite_usage_file,
get_sqlite_shared_system_file,
get_sqlite_usage_file,
)

Expand Down Expand Up @@ -125,7 +125,7 @@ def _retrieve_messages(
return rows


def list_thread_history(feature: FeatureKey, id_: Optional[int] = None) -> list[tuple[int, str, int]]:
def list_thread_history(feature: FeatureKey, id_: Optional[int] = None) -> list[tuple[int, str, int, int]]:
"""List threads or a thread if id_ is provided."""
tablename = get_history_thread_table_name(feature.type_)
rows = None
Expand All @@ -137,10 +137,17 @@ def list_thread_history(feature: FeatureKey, id_: Optional[int] = None) -> list[
table=tablename,
)
)

connection.execute(f"ATTACH DATABASE '{get_sqlite_shared_system_file()}' AS db2")
if id_:
rows = cursor.execute(f"SELECT id, topic, created_at FROM {tablename} WHERE id = ?", (id_,)).fetchall() # noqa: S608
rows = cursor.execute(
f"SELECT t.id, t.topic, t.created_at, s.id as space_id FROM {tablename} as t LEFT JOIN db2.spaces AS s ON s.name LIKE 'Thread-' || t.id || ' %' WHERE t.id = ?",
(id_,),
).fetchall() # noqa: S608
else:
rows = cursor.execute(f"SELECT id, topic, created_at FROM {tablename} ORDER BY created_at DESC").fetchall() # noqa: S608
rows = cursor.execute(
f"SELECT t.id, t.topic, t.created_at, s.id as space_id FROM {tablename} as t LEFT JOIN db2.spaces as s ON s.name LIKE 'Thread-' || t.id || ' %' ORDER BY t.created_at DESC",
).fetchall() # noqa: S608

return rows

Expand Down
Loading

0 comments on commit 9980b47

Please sign in to comment.