From 353492cd1cf18a4924a163a721ce30b20a43b8d8 Mon Sep 17 00:00:00 2001 From: Mohamed Cheikh Sidiya Date: Mon, 6 Nov 2023 16:29:23 +0100 Subject: [PATCH] ~ --- convoviz/cli.py | 8 ++--- convoviz/configuration.py | 12 +++---- convoviz/data_analysis.py | 11 +++---- convoviz/long_runs.py | 4 +-- convoviz/models/__init__.py | 7 +--- convoviz/models/_conversation.py | 19 ++++++----- convoviz/models/_conversation_set.py | 6 ++-- convoviz/models/_node.py | 14 +++++--- playground.ipynb | 48 ++++++++++------------------ pyproject.toml | 2 +- tests/test_conversation.py | 4 +-- 11 files changed, 54 insertions(+), 81 deletions(-) diff --git a/convoviz/cli.py b/convoviz/cli.py index d1d0ee2..0f6238c 100644 --- a/convoviz/cli.py +++ b/convoviz/cli.py @@ -35,7 +35,7 @@ def main() -> None: print("Loading data 📂 ...\n") - entire_collection = ConversationSet.from_zip(user.lookup["zip_filepath"]) + entire_collection = ConversationSet.from_zip(user.configs["zip_filepath"]) bkmrklet_json = latest_bookmarklet_json() if bkmrklet_json: @@ -43,7 +43,7 @@ def main() -> None: bkmrklet_collection = ConversationSet.from_json(bkmrklet_json) entire_collection.update(bkmrklet_collection) - output_folder = Path(user.lookup["output_folder"]) + output_folder = Path(user.configs["output_folder"]) # overwrite the output folder if it already exists (might change this in the future) if output_folder.exists() and output_folder.is_dir(): @@ -63,7 +63,7 @@ def main() -> None: generate_week_barplots( entire_collection, graph_folder, - **user.lookup["graph"], + **user.configs["graph"], progress_bar=True, ) @@ -76,7 +76,7 @@ def main() -> None: generate_wordclouds( entire_collection, wordcloud_folder, - **user.lookup["wordcloud"], + **user.configs["wordcloud"], progress_bar=True, ) diff --git a/convoviz/configuration.py b/convoviz/configuration.py index 4504565..1a17ba9 100644 --- a/convoviz/configuration.py +++ b/convoviz/configuration.py @@ -47,13 +47,13 @@ class UserConfigs: def __init__(self) -> None: """Initialize UserConfigs object.""" - # will implement a way to read from a config file later ... + self.configs = DEFAULT_USER_CONFIGS.copy() - self.lookup = DEFAULT_USER_CONFIGS.copy() + # will implement a way to read from a config file later ... def prompt(self) -> None: """Prompt the user for input and update the configs.""" - lookup = self.lookup + lookup = self.configs lookup["zip_filepath"] = qst_path( "Enter the path to the zip file :", @@ -121,7 +121,5 @@ def prompt(self) -> None: def set_model_configs(self) -> None: """Set the configuration for all models.""" - Message.update_configs(self.lookup["message"]) - Conversation.update_configs(self.lookup["conversation"]) - - # will implement a way to save to a config file later ... + Message.update_configs(self.configs["message"]) + Conversation.update_configs(self.configs["conversation"]) diff --git a/convoviz/data_analysis.py b/convoviz/data_analysis.py index 1eb6110..5e0601e 100644 --- a/convoviz/data_analysis.py +++ b/convoviz/data_analysis.py @@ -9,7 +9,7 @@ from collections import defaultdict from datetime import datetime, timezone -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Unpack from matplotlib.figure import Figure from nltk import download as nltk_download # type: ignore[import-untyped] @@ -20,9 +20,7 @@ from .utils import DEFAULT_WORDCLOUD_CONFIGS if TYPE_CHECKING: - from matplotlib.axes import Axes from PIL.Image import Image - from typing_extensions import Unpack from .utils import GraphKwargs, WordCloudKwargs @@ -52,8 +50,8 @@ def generate_week_barplot( x = days y = [weekday_counts[day] for day in days] - fig: Figure = Figure(dpi=300) - ax: Axes = fig.add_subplot() + fig = Figure(dpi=300) + ax = fig.add_subplot() ax.bar(x, y) ax.set_xlabel("Weekday") @@ -117,5 +115,4 @@ def generate_wordcloud( include_numbers=configs.get("include_numbers"), # pyright: ignore[reportGeneralTypeIssues] ).generate(text) - img: Image = wordcloud.to_image() - return img + return wordcloud.to_image() diff --git a/convoviz/long_runs.py b/convoviz/long_runs.py index 019c6b5..9a2ff9e 100644 --- a/convoviz/long_runs.py +++ b/convoviz/long_runs.py @@ -3,13 +3,11 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Unpack from tqdm import tqdm if TYPE_CHECKING: - from typing_extensions import Unpack - from .models import ConversationSet from .utils import GraphKwargs, WordCloudKwargs diff --git a/convoviz/models/__init__.py b/convoviz/models/__init__.py index cfd3ffa..f60a685 100644 --- a/convoviz/models/__init__.py +++ b/convoviz/models/__init__.py @@ -5,9 +5,4 @@ from ._message import Message from ._node import Node -__all__ = [ - "Conversation", - "ConversationSet", - "Message", - "Node", -] +__all__ = ["Conversation", "ConversationSet", "Message", "Node"] diff --git a/convoviz/models/_conversation.py b/convoviz/models/_conversation.py index a28b88c..edbfd80 100644 --- a/convoviz/models/_conversation.py +++ b/convoviz/models/_conversation.py @@ -8,7 +8,7 @@ from datetime import datetime, timedelta from os import utime as os_utime from pathlib import Path -from typing import TYPE_CHECKING, Any, ClassVar +from typing import TYPE_CHECKING, Any, ClassVar, Unpack from orjson import loads from pydantic import BaseModel @@ -27,7 +27,6 @@ if TYPE_CHECKING: from PIL.Image import Image - from typing_extensions import Unpack from ._message import AuthorRole @@ -90,7 +89,7 @@ def leaf_count(self) -> int: return sum(1 for node in self._all_message_nodes if not node.children_nodes) @property - def chat_link(self) -> str: + def url(self) -> str: """Chat URL.""" return f"https://chat.openai.com/c/{self.conversation_id}" @@ -126,7 +125,7 @@ def model(self) -> str | None: return message.metadata.model_slug if message else None @property - def used_plugins(self) -> list[str]: + def plugins(self) -> list[str]: """List of all ChatGPT plugins used in the conversation.""" return list( { @@ -137,17 +136,17 @@ def used_plugins(self) -> list[str]: ) @property - def custom_instructions(self) -> dict[str, str] | None: + def custom_instructions(self) -> dict[str, str]: """Return custom instructions used for the conversation.""" system_nodes = self._author_nodes("system") if len(system_nodes) < 2: - return None + return {} context_message = system_nodes[1].message if context_message and context_message.metadata.is_user_system_message: - return context_message.metadata.user_context_message_data + return context_message.metadata.user_context_message_data or {} - return None + return {} # TODO: check if this is the same for conversations from the bookmarklet @@ -158,11 +157,11 @@ def yaml(self) -> str: yaml_map = { "title": self.title, - "chat_link": self.chat_link, + "chat_link": self.url, "create_time": self.create_time, "update_time": self.update_time, "model": self.model, - "used_plugins": self.used_plugins, + "used_plugins": self.plugins, "message_count": self.message_count("user", "assistant"), "content_types": self.content_types, "custom_instructions": self.custom_instructions, diff --git a/convoviz/models/_conversation_set.py b/convoviz/models/_conversation_set.py index 971f494..9ca5574 100644 --- a/convoviz/models/_conversation_set.py +++ b/convoviz/models/_conversation_set.py @@ -6,7 +6,7 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Unpack from orjson import OPT_INDENT_2, dumps, loads from pydantic import BaseModel @@ -19,11 +19,9 @@ if TYPE_CHECKING: from datetime import datetime - from typing import Any from matplotlib.figure import Figure from PIL.Image import Image - from typing_extensions import Unpack from convoviz.utils import GraphKwargs, WordCloudKwargs @@ -91,7 +89,7 @@ def custom_instructions(self) -> list[dict[str, Any]]: instructions_info = { "chat_title": conversation.title, - "chat_link": conversation.chat_link, + "chat_link": conversation.url, "time": conversation.create_time, "custom_instructions": conversation.custom_instructions, } diff --git a/convoviz/models/_node.py b/convoviz/models/_node.py index 52e15b9..1b98935 100644 --- a/convoviz/models/_node.py +++ b/convoviz/models/_node.py @@ -33,14 +33,18 @@ def add_child(self, node: Node) -> None: @classmethod def mapping(cls, mapping: dict[str, Node]) -> dict[str, Node]: """Return a dictionary of connected Node objects, based on the mapping.""" - node_mapping = mapping.copy() + # Initialize connections + for node in mapping.values(): + node.children_nodes = [] # Ensure list is empty to avoid duplicates + node.parent_node = None # Ensure parent_node is None # Connect nodes - for key, value in node_mapping.items(): - for child_id in value.children: - node_mapping[key].add_child(node_mapping[child_id]) + for node in mapping.values(): + for child_id in node.children: + child_node = mapping[child_id] + node.add_child(child_node) - return node_mapping + return mapping @property def header(self) -> str: diff --git a/playground.ipynb b/playground.ipynb index a148b20..cfd2eb7 100644 --- a/playground.ipynb +++ b/playground.ipynb @@ -30,26 +30,19 @@ "from __future__ import annotations\n", "\n", "from pathlib import Path\n", - "from typing import TYPE_CHECKING, Callable\n", - "\n", - "from convoviz.models import ConversationSet\n", + "from random import choice\n", + "from typing import Callable\n", "\n", - "if TYPE_CHECKING:\n", - " from convoviz.models import Conversation\n", + "from convoviz.models import Conversation, ConversationSet\n", + "from convoviz.utils import colormaps, font_names\n", "\n", - "conversations_path = Path(\"data\") / \"conversations.json\" # adjust path if needed\n", + "convos_path = Path(\"data\") / \"conversations.json\" # adjust path if needed\n", "output_path = Path(\"output\")\n", "output_path.mkdir(exist_ok=True)\n", "\n", - "collection = ConversationSet.from_json(conversations_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + "collection = ConversationSet.from_json(convos_path)\n", + "\n", + "\n", "def clear_output() -> None:\n", " \"\"\"Clear output folder.\"\"\"\n", " for file in output_path.glob(\"*\"):\n", @@ -96,13 +89,13 @@ " f\"Max {description}: {max_stat}\\n\",\n", " )\n", "\n", - " convos_sorted_by_attr = sorted(\n", + " sorted_convos = sorted(\n", " collection.array,\n", " key=attr_func,\n", " reverse=True,\n", " )\n", "\n", - " for convo in convos_sorted_by_attr[:count]:\n", + " for convo in sorted_convos[:count]:\n", " print(\n", " f\"id: {convo.conversation_id}\\n\"\n", " f\"title: {convo.title}\\n\"\n", @@ -110,7 +103,7 @@ " )\n", " file_path = output_path / f\"{convo.title}.md\"\n", " convo.save(file_path)\n", - " print(f\"saved to '{file_path.resolve()}'\\n\")" + " print(f\"saved to '{file_path}'\\n\")" ] }, { @@ -146,7 +139,7 @@ "metadata": {}, "outputs": [], "source": [ - "get_top_convos(lambda c: len(c.used_plugins), \"plugin count\")" + "get_top_convos(lambda c: len(c.plugins), \"plugin count\")" ] }, { @@ -169,27 +162,17 @@ "metadata": {}, "outputs": [], "source": [ - "from random import choice\n", - "\n", - "from convoviz.utils import colormaps, font_names\n", - "\n", "week_groups = collection.group_by_week()\n", - "\n", "week = choice(list(week_groups.keys()))\n", "\n", "sample_conv_set = week_groups[week]\n", "\n", "font_name = choice(font_names())\n", - "\n", "font_path = f\"convoviz/assets/fonts/{font_name}.ttf\"\n", - "\n", "colormap = choice(colormaps())\n", - "\n", - "\n", - "img = sample_conv_set.wordcloud(font_path=font_path, colormap=colormap)\n", - "\n", "print(f\"font: {font_name}\\ncolormap: {colormap}\\n\")\n", "\n", + "img = sample_conv_set.wordcloud(font_path=font_path, colormap=colormap)\n", "img.show()" ] }, @@ -206,7 +189,8 @@ "metadata": {}, "outputs": [], "source": [ - "fig = sample_conv_set.week_barplot(\"Prompts per day\")" + "fig = sample_conv_set.week_barplot(\"Prompts per day\")\n", + "fig.savefig(output_path / \"week_barplot.png\") # pyright: ignore[reportUnknownMemberType]" ] } ], @@ -226,7 +210,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index ee231f1..183fdf1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ enable_incomplete_feature = ["Unpack"] [tool.poetry] name = "convoviz" -version = "0.1.5" +version = "0.1.6" description = "Get analytics and visualizations on your ChatGPT data !" authors = ["Mohamed Cheikh Sidiya "] license = "MIT" diff --git a/tests/test_conversation.py b/tests/test_conversation.py index 46e70f6..88bbeb4 100644 --- a/tests/test_conversation.py +++ b/tests/test_conversation.py @@ -27,7 +27,7 @@ def test_leaf_count() -> None: def test_chat_link() -> None: """Test chat_link method.""" - assert conversation.chat_link == f"https://chat.openai.com/c/{CONVERSATION_ID_111}" + assert conversation.url == f"https://chat.openai.com/c/{CONVERSATION_ID_111}" def test_content_types() -> None: @@ -60,7 +60,7 @@ def test_model_slug() -> None: def test_used_plugins() -> None: """Test used_plugins method.""" - assert len(conversation.used_plugins) == 0 + assert len(conversation.plugins) == 0 def test_yaml_header() -> None: