-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathhistory.py
142 lines (106 loc) · 3.62 KB
/
history.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import json
import sys
from typing import List, Union, Optional
from collections import OrderedDict
from datetime import datetime
from pydantic.v1 import BaseModel # v2 throws warnings
import tiktoken
DEFAULT_MODEL_SLUG = "gpt-3.5-turbo"
class Author(BaseModel):
role: str
class ContentPartMetadata(BaseModel):
dalle: dict
class ContentPart(BaseModel):
content_type: str
asset_pointer: Optional[str]
size_bytes: Optional[int]
width: Optional[int]
height: Optional[int]
fovea: Optional[None]
metadata: Optional[ContentPartMetadata]
class Content(BaseModel):
content_type: str
parts: Optional[List[Union[str, ContentPart]]]
text: Optional[str]
class MessageMetadata(BaseModel):
model_slug: Optional[str]
# parent_id: Optional[str]
class Message(BaseModel):
id: str
author: Author
create_time: Optional[float]
update_time: Optional[float]
content: Optional[Content]
metadata: MessageMetadata
@property
def text(self) -> str:
if self.content:
if self.content.text:
return self.content.text
elif self.content.content_type == 'text' and self.content.parts:
return " ".join(str(part) for part in self.content.parts)
elif self.content.content_type == 'multimodal_text':
return "[TODO: process DALL-E and other multimodal]"
return ""
@property
def role(self) -> str:
return self.author.role
@property
def created(self) -> datetime:
return datetime.fromtimestamp(self.create_time)
@property
def created_str(self) -> str:
return self.created.strftime('%Y-%m-%d %H:%M:%S')
@property
def model_str(self) -> str:
return self.metadata.model_slug or DEFAULT_MODEL_SLUG
def count_tokens(self) -> int:
try:
encoding = tiktoken.encoding_for_model(self.model_str)
except KeyError:
encoding = tiktoken.encoding_for_model(DEFAULT_MODEL_SLUG)
return len(encoding.encode(self.text))
class MessageMapping(BaseModel):
id: str
message: Optional[Message]
class Conversation(BaseModel):
id: str
title: Optional[str]
create_time: float
update_time: float
mapping: OrderedDict[str, MessageMapping]
@property
def messages(self) -> List:
return [msg.message for k, msg in self.mapping.items() if msg.message and msg.message.text]
@property
def created(self) -> datetime:
return datetime.fromtimestamp(self.create_time)#.strftime('%Y-%m-%d %H:%M:%S')
@property
def created_str(self) -> str:
return self.created.strftime('%Y-%m-%d %H:%M:%S')
@property
def updated(self) -> datetime:
return datetime.fromtimestamp(self.update_time)
@property
def updated_str(self) -> str:
return self.updated.strftime('%Y-%m-%d %H:%M:%S')
@property
def title_str(self) -> str:
return self.title or '[Untitled]'
@property
def total_length(self) -> int:
start_time = self.created
end_time = max(msg.created for msg in self.messages) if self.messages else start_time
return (end_time - start_time).total_seconds()
def load_conversations(path: str) -> List[Conversation]:
with open(path, 'r') as f:
conversations_json = json.load(f)
# Load the JSON data into these models
try:
conversations = [Conversation(**conv) for conv in conversations_json]
success = True
except Exception as e:
print(str(e))
sys.exit(1)
print(f"-- Loaded {len(conversations)} conversations")
return conversations