-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollection_models.py
237 lines (186 loc) · 14.8 KB
/
collection_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
from datetime import datetime
from typing import Optional, Literal, Union
from pydantic import BaseModel, Field, PositiveInt, field_validator
from pydantic_core.core_schema import ValidationInfo
from namegraph.xcollections.query_builder import SortOrder
from models import UserInfo, Metadata, RecursiveRelatedCollection
class CollectionLabel(BaseModel):
label: str = Field(title='label from a collection')
class Collection(BaseModel):
collection_id: str = Field(title='id of the collection')
title: str = Field(title='title of the collection')
owner: str = Field(title='ETH address of the collection owner')
number_of_labels: int = Field(title='total number of labels in the collection')
last_updated_timestamp: int = Field(title='timestamp in milliseconds of last collection update')
top_labels: list[CollectionLabel] = Field(
title='top labels stored in the collection (limited by `limit_labels`)', description='can not be greater than 10')
types: list[str] = Field(title='list of types to which the collection belongs',
description='example of type is `human`')
avatar_emoji: str = Field(title='avatar emoji associated with this collection')
avatar_image: Optional[str] = Field(None, title='avatar image associated with this collection',
description='for now, is always null')
class CollectionResultMetadata(BaseModel):
total_number_of_matched_collections: Optional[Union[int, str]] = Field(
title='number of matched collections before trimming the result or `1000+` if more than 1000 results',
description='return null for `count*` endpoints')
processing_time_ms: float = Field(title='time elapsed for this query in milliseconds')
elasticsearch_processing_time_ms: Optional[float] = Field(
title='time elapsed for elasticsearch query in milliseconds', description='return null for `count*` endpoints')
elasticsearch_communication_time_ms: Optional[float] = Field(
title='time elapsed for the communication with elasticsearch from the request sending to response receiving '
'in milliseconds')
class BaseCollectionQueryResponse(BaseModel):
metadata: CollectionResultMetadata = Field(title='additional information about collection query response')
# ======== Collection Search ========
class BaseCollectionRequest(BaseModel):
user_info: Optional[UserInfo] = Field(None, title='information about user making request')
class BaseCollectionSearchLimitOffsetSort(BaseCollectionRequest):
limit_labels: int = Field(10, ge=0, le=10, title='the number of labels returned in each collection',
description='can not be greater than 10')
offset: int = Field(0,
title='offset of the first collection to return (used for pagination)',
description='DO NOT use pagination with diversity algorithm')
class BaseCollectionSearch(BaseCollectionSearchLimitOffsetSort):
max_related_collections: int = Field(3, ge=0, title='max number of related collections to return (for each page)',
description='return collections at [offset, offset + max_related_collections) positions (order as in sort_order)')
max_per_type: Optional[int] = Field(None, examples=[3],
title='number of collections with the same type which are not penalized',
description='* set to null if you want to disable the penalization\n'
'* if the penalization algorithm is turned on then 3 times more results (than max_related_collections) are retrieved from Elasticsearch')
label_diversity_ratio: Optional[float] = Field(None, examples=[0.5], ge=0.0, le=1.0,
title='similarity value used for adding penalty to collections with similar labels to other collections',
description='* if more than label_diversity_ratio % of the labels have already been used, penalize the collection\n'
'* set to null if you want disable the penalization\n'
'* if the penalization algorithm is turned on then 3 times more results (than `max_related_collections`) '
'are retrieved from Elasticsearch'
)
class BaseCollectionSearchWithOther(BaseCollectionSearch): # instant search, domain details
min_other_collections: int = Field(0, ge=0, title='min number of other collections to return')
max_other_collections: int = Field(3, ge=0, title='max number of other collections to return',
description='constraint: \n'
'* min_other_collections <= max_other_collections\n'
'\nif not met, 422 status code is returned')
max_total_collections: int = Field(6, ge=0, title='max number of total (related + other) collections to return',
description='constraints: \n'
'* max_other_collections <= max_total_collections\n'
'* min_other_collections + max_related_collections <= max_total_collections\n'
'\nif not met, 422 status code is returned')
@field_validator('max_other_collections')
@classmethod
def max_other_between_min_other_and_max_total(cls, v: int, info: ValidationInfo) -> int:
if 'min_other_collections' in info.data and info.data['min_other_collections'] > v:
raise ValueError('min_other_collections must not be greater than max_other_collections')
return v
@field_validator('max_total_collections')
@classmethod
def max_related_between_min_other_and_max_total(cls, v: int, info: ValidationInfo) -> int:
if 'max_other_collections' in info.data and v < info.data['max_other_collections']:
raise ValueError('max_other_collections must not be greater than max_total_collections')
if 'min_other_collections' in info.data and 'max_related_collections' in info.data and \
info.data['min_other_collections'] + info.data['max_related_collections'] > v:
raise ValueError(
'min_other_collections + max_related_collections must not be greater than max_total_collections')
return v
class CollectionSearchByString(BaseCollectionSearchWithOther): # instant search, domain details
query: str = Field(title='input query (with or without spaces) which is used to search for template collections',
description='can not contain dots (.)',
pattern='^[^.]+$', examples=['zeus god'])
mode: str = Field('instant', title='request mode: instant, domain_detail', pattern=r'^(instant|domain_detail)$')
sort_order: Literal[SortOrder.AZ, SortOrder.ZA, SortOrder.AI, SortOrder.RELEVANCE] = Field(SortOrder.AI, title='order of the resulting collections',
description='* if A-Z or Z-A - sort by title (alphabetically ascending/descending)\n'
'* if AI - use intelligent endpoint-specific ranking (with Learning to Rank for optimal results)\n'
'* if Relevance - use relevance ranking')
class CollectionSearchByCollection(BaseCollectionSearchWithOther): # collection_details
collection_id: str = Field(title='id of the collection used for search', examples=['ri2QqxnAqZT7'])
sort_order: Literal[SortOrder.AZ, SortOrder.ZA, SortOrder.RELEVANCE] = Field(SortOrder.RELEVANCE, title='order of the resulting collections',
description='* if A-Z or Z-A - sort by title (alphabetically ascending/descending)\n'
'* if Relevance - use relevance ranking')
class CollectionSearchResponse(BaseCollectionQueryResponse):
related_collections: list[Collection] = Field(title='list of related collections')
other_collections: list[Collection] = Field(title='list of other collections (if not enough related collections)')
class CollectionCountByStringRequest(BaseCollectionRequest):
query: str = Field(title='input query (with or without spaces) which is used to search for template collections',
description='can not contain dots (.)',
pattern='^[^.]+$', examples=['zeus god'])
mode: str = Field('instant', title='request mode: instant, domain_detail', pattern=r'^(instant|domain_detail)$')
# ======== Collection Membership ========
class CollectionsContainingLabelCountRequest(BaseCollectionRequest):
label: str = Field(title='label for which collection membership will be checked', examples=['zeus'])
class CollectionsContainingLabelCountResponse(BaseCollectionQueryResponse):
count: Union[int, str] = Field(
title='count of collections containing input label or `1000+` if more than 1000 results')
class CollectionsContainingLabelRequest(BaseCollectionSearchLimitOffsetSort):
label: str = Field(title='label for which membership will be checked for each collection', examples=['zeus'])
mode: str = Field('instant', title='request mode: instant, domain_detail', pattern=r'^(instant|domain_detail)$')
max_results: int = Field(3, ge=0, title='max number of collections to return (for each page)',
description='return collections at [offset, offset + max_results) positions (order as in sort_order)')
sort_order: Literal[SortOrder.AZ, SortOrder.ZA, SortOrder.AI, SortOrder.RELEVANCE] = Field(SortOrder.AI, title='order of the resulting collections',
description='* if A-Z or Z-A - sort by title (alphabetically ascending/descending)\n'
'* if AI - use intelligent endpoint-specific ranking\n'
'* if Relevance - use relevance ranking')
class CollectionsContainingLabelResponse(BaseCollectionQueryResponse):
collections: list[Collection] = Field(title='list of public collections the provided label is a member of')
class GetCollectionByIdRequest(BaseCollectionRequest):
collection_id: str = Field(title='id of the collection to fetch', examples=['ri2QqxnAqZT7'])
# ======== Suggestions from collections ========
class SuggestionFromCollection(BaseModel):
label: str = Field(title="label from a collection")
tokenized_label: list[str] = Field(title="suggested tokenization of label")
metadata: Optional[Metadata] = Field(None, title="information how suggestion was generated",
description="if metadata=False this key is absent")
class CollectionWithSuggestions(BaseModel):
suggestions: list[SuggestionFromCollection] = Field(title='suggestions from a collection')
collection_id: str = Field(title='id of the collection')
collection_title: str = Field(title='title of the collection')
collection_members_count: int = Field(title='number of members in the collection')
related_collections: list[RecursiveRelatedCollection] = Field(title='related collections to this collection')
class SampleCollectionMembers(BaseModel):
user_info: Optional[UserInfo] = Field(None, title='information about user making request')
collection_id: str = Field(title='id of the collection to sample from', examples=['qdeq7I9z0_jv'])
metadata: bool = Field(True, title='return all the metadata in response')
max_sample_size: int = Field(title='the maximum number of members to sample', ge=1, le=100,
description='if the collection has less members than max_sample_size, '
'all the members will be returned', examples=[5])
seed: int = Field(default_factory=lambda: int(datetime.now().timestamp()),
title='seed for random number generator',
description='if not provided (but can\'t be null), random seed will be generated')
class Top10CollectionMembersRequest(BaseModel):
user_info: Optional[UserInfo] = Field(None, title='information about user making request')
collection_id: str = Field(title='id of the collection to fetch labels from', examples=['ri2QqxnAqZT7'])
metadata: bool = Field(True, title='return all the metadata in response')
max_recursive_related_collections: int = Field(3, ge=0, le=10,
title='Set to 0 to disable the "recursive related collection search". '
'When set to a value between 1 and 10, '
'for each related collection we find, '
'we also do a (depth 1 recursive) lookup for this many related collections '
'to the related collection.')
class ScrambleCollectionTokens(BaseModel):
user_info: Optional[UserInfo] = Field(None, title='information about user making request')
collection_id: str = Field(title='id of the collection to take tokens from', examples=['3OB_f2vmyuyp'])
metadata: bool = Field(True, title='return all the metadata in response')
method: Literal['left-right-shuffle', 'left-right-shuffle-with-unigrams', 'full-shuffle'] = \
Field('left-right-shuffle-with-unigrams', title='method used to scramble tokens and generate new suggestions',
description='* left-right-shuffle - tokenize labels as bigrams and shuffle the right-side tokens (do not use unigrams)'
'\n* left-right-shuffle-with-unigrams - same as above, but with some tokens swapped with unigrams'
'\n* full-shuffle - shuffle all tokens from bigrams and unigrams and create random bigrams')
n_top_members: int = Field(25, title='number of collection\'s top members to include in scrambling', ge=1)
max_suggestions: Optional[PositiveInt] = Field(10, title='maximal number of suggestions to generate',
examples=[10], description='must be a positive integer or null\n* number of generated suggestions will be '
'`max_suggestions` or less (exactly `max_suggestions` if there are enough members)\n'
'* if null, no tokens are repeated')
seed: int = Field(default_factory=lambda: int(datetime.now().timestamp()),
title='seed for random number generator',
description='if not provided (but can\'t be null), random seed will be generated')
class FetchCollectionMembersRequest(BaseModel):
collection_id: str = Field(
title='id of the collection to fetch members from', examples=['ri2QqxnAqZT7']
)
offset: int = Field(
0, title='number of members to skip', description='used for pagination', ge=0
)
limit: int = Field(
10, title='maximum number of members to return', description='used for pagination', ge=1,
)
metadata: bool = Field(
True, title='return all the metadata in response'
)