Skip to content

Commit

Permalink
rename fields and classes in (collection_)models.py; adjust tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Byczong authored and djstrong committed Jan 24, 2025
1 parent f884c98 commit dc258b3
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 126 deletions.
39 changes: 16 additions & 23 deletions collection_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@
from models import UserInfo, Metadata, RecursiveRelatedCollection


class CollectionName(BaseModel): # todo: change to CollectionLabel
name: str = Field(title='label from a collection') # todo: change to label
namehash: str = Field(title='namehash of the name') # todo: remove namehash (also from the collections code)
class CollectionLabel(BaseModel):
label: str = Field(title='label from a collection')


class Collection(BaseModel):
collection_id: str = Field(title='id of the collection')
title: str = Field(title='title of the collection')
owner: str = Field(title='ETH address of the collection owner')
number_of_names: int = Field(title='total number of names in the collection')
number_of_labels: int = Field(title='total number of labels in the collection')
last_updated_timestamp: int = Field(title='timestamp in milliseconds of last collection update')
top_names: list[CollectionName] = Field(
title='top names stored in the collection (limited by `limit_names`)', description='can not be greater than 10')
top_labels: list[CollectionLabel] = Field(
title='top labels stored in the collection (limited by `limit_labels`)', description='can not be greater than 10')
types: list[str] = Field(title='list of types to which the collection belongs',
description='example of type is `human`')
avatar_emoji: str = Field(title='avatar emoji associated with this collection')
Expand Down Expand Up @@ -50,7 +49,7 @@ class BaseCollectionRequest(BaseModel):


class BaseCollectionSearchLimitOffsetSort(BaseCollectionRequest):
limit_names: int = Field(10, ge=0, le=10, title='the number of names returned in each collection',
limit_labels: int = Field(10, ge=0, le=10, title='the number of labels returned in each collection',
description='can not be greater than 10')
offset: int = Field(0,
title='offset of the first collection to return (used for pagination)',
Expand All @@ -66,8 +65,8 @@ class BaseCollectionSearch(BaseCollectionSearchLimitOffsetSort):
description='* set to null if you want to disable the penalization\n'
'* if the penalization algorithm is turned on then 3 times more results (than max_related_collections) are retrieved from Elasticsearch')
name_diversity_ratio: Optional[float] = Field(None, examples=[0.5], ge=0.0, le=1.0,
title='similarity value used for adding penalty to collections with similar names to other collections',
description='* if more than name_diversity_ratio % of the names have already been used, penalize the collection\n'
title='similarity value used for adding penalty to collections with similar labels to other collections',
description='* if more than name_diversity_ratio % of the labels have already been used, penalize the collection\n'
'* set to null if you want disable the penalization\n'
'* if the penalization algorithm is turned on then 3 times more results (than `max_related_collections`) '
'are retrieved from Elasticsearch'
Expand Down Expand Up @@ -135,16 +134,16 @@ class CollectionCountByStringRequest(BaseCollectionRequest):

# ======== Collection Membership ========

class CollectionsContainingNameCountRequest(BaseCollectionRequest):
class CollectionsContainingLabelCountRequest(BaseCollectionRequest):
label: str = Field(title='label for which collection membership will be checked', examples=['zeus'])


class CollectionsContainingNameCountResponse(BaseCollectionQueryResponse):
class CollectionsContainingLabelCountResponse(BaseCollectionQueryResponse):
count: Union[int, str] = Field(
title='count of collections containing input label or `1000+` if more than 1000 results')


class CollectionsContainingNameRequest(BaseCollectionSearchLimitOffsetSort):
class CollectionsContainingLabelRequest(BaseCollectionSearchLimitOffsetSort):
label: str = Field(title='label for which membership will be checked for each collection', examples=['zeus'])
mode: str = Field('instant', title='request mode: instant, domain_detail', pattern=r'^(instant|domain_detail)$')
max_results: int = Field(3, ge=0, title='max number of collections to return (for each page)',
Expand All @@ -154,7 +153,8 @@ class CollectionsContainingNameRequest(BaseCollectionSearchLimitOffsetSort):
'* if AI - use intelligent endpoint-specific ranking\n'
'* if Relevance - use relevance ranking')

class CollectionsContainingNameResponse(BaseCollectionQueryResponse):

class CollectionsContainingLabelResponse(BaseCollectionQueryResponse):
collections: list[Collection] = Field(title='list of public collections the provided label is a member of')


Expand All @@ -165,7 +165,7 @@ class GetCollectionByIdRequest(BaseCollectionRequest):
# ======== Suggestions from collections ========

class SuggestionFromCollection(BaseModel):
name: str = Field(title="label from a collection") # todo: change to label
label: str = Field(title="label from a collection")
tokenized_label: list[str] = Field(title="suggested tokenization of label")
metadata: Optional[Metadata] = Field(None, title="information how suggestion was generated",
description="if metadata=False this key is absent")
Expand Down Expand Up @@ -193,7 +193,7 @@ class SampleCollectionMembers(BaseModel):

class Top10CollectionMembersRequest(BaseModel):
user_info: Optional[UserInfo] = Field(None, title='information about user making request')
collection_id: str = Field(title='id of the collection to fetch names from', examples=['ri2QqxnAqZT7'])
collection_id: str = Field(title='id of the collection to fetch labels from', examples=['ri2QqxnAqZT7'])
metadata: bool = Field(True, title='return all the metadata in response')
max_recursive_related_collections: int = Field(3, ge=0, le=10,
title='Set to 0 to disable the "recursive related collection search". '
Expand All @@ -209,7 +209,7 @@ class ScrambleCollectionTokens(BaseModel):
metadata: bool = Field(True, title='return all the metadata in response')
method: Literal['left-right-shuffle', 'left-right-shuffle-with-unigrams', 'full-shuffle'] = \
Field('left-right-shuffle-with-unigrams', title='method used to scramble tokens and generate new suggestions',
description='* left-right-shuffle - tokenize names as bigrams and shuffle the right-side tokens (do not use unigrams)'
description='* left-right-shuffle - tokenize labels as bigrams and shuffle the right-side tokens (do not use unigrams)'
'\n* left-right-shuffle-with-unigrams - same as above, but with some tokens swapped with unigrams'
'\n* full-shuffle - shuffle all tokens from bigrams and unigrams and create random bigrams')
n_top_members: int = Field(25, title='number of collection\'s top members to include in scrambling', ge=1)
Expand All @@ -235,10 +235,3 @@ class FetchCollectionMembersRequest(BaseModel):
metadata: bool = Field(
True, title='return all the metadata in response'
)


# refactor models plan:
# [x] apply easy renamings
# [x] separate request forming functions
# 3. adjust collection models and their request forming functions
# 4. check josiah renamings
16 changes: 8 additions & 8 deletions models.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,23 @@ class Metadata(BaseModel):
description='list of interpretation tags based on which the '
'suggestion has been generated')
cached_status: str = Field(title='cached status',
description='name\'s status cached at the time of application startup')
description='label\'s status cached at the time of application startup')
categories: list[str] = Field(title='domain category',
description='can be either available, taken, recently released or on sale')
cached_interesting_score: Optional[float] = Field(title='cached interesting score',
description='name\'s interesting score cached at the time of '
description='label\'s interesting score cached at the time of '
'application startup')
applied_strategies: list[list[str]] = Field(
title="sequence of steps performed in every pipeline that generated the suggestion"
)
collection_title: Optional[str] = Field(
title='name of the collection',
description='if name has been generated using a collection, '
description='if label has been generated using a collection, '
'then this field would contains its name, else it is null'
)
collection_id: Optional[str] = Field(
title='id of the collection',
description='if name has been generated using a collection, '
description='if label has been generated using a collection, '
'then this field would contains its id, else it is null'
)
grouping_category: Optional[str] = Field(title='grouping category to which this suggestion belongs')
Expand Down Expand Up @@ -153,7 +153,7 @@ class CategoriesParams(BaseModel):
model_config = ConfigDict(frozen=True)


class GroupedNameRequest(BaseModel):
class GroupedLabelRequest(BaseModel):
label: str = Field(title='input label', pattern='^[^.]*$', examples=['zeus'],
description='* cannot contain dots (.)'
'\n* if enclosed in double quotes assuming label is pre-tokenized')
Expand All @@ -169,7 +169,7 @@ class GroupedNameRequest(BaseModel):
title='controls the results of other categories than related (except for "Other Names")')


class NameRequest(BaseModel):
class LabelRequest(BaseModel):
label: str = Field(title='input label', description='cannot contain dots (.)',
pattern='^[^.]*$', examples=['zeus'])
metadata: bool = Field(True, title='return all the metadata in response')
Expand All @@ -188,8 +188,8 @@ class NameRequest(BaseModel):


class Suggestion(BaseModel):
name: str = Field(title="suggested similar label") # todo: change to label
tokenized_label: list[str] = Field(title="original tokenization of suggested name's label")
label: str = Field(title="suggested similar label")
tokenized_label: list[str] = Field(title="suggested tokenization of label")
metadata: Optional[Metadata] = Field(None, title="information how suggestion was generated",
description="if metadata=False this key is absent")

Expand Down
55 changes: 28 additions & 27 deletions tests/test_collections_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def test_collection_api_metadata(self, test_test_client):
es_time = response_json['metadata'].get('elasticsearch_processing_time_ms', 0)
assert es_time <= response_json['metadata']['processing_time_ms'] <= (t1 - t0) * 1000

@mark.skip(reason='we return only names, without the root name')
@mark.skip(reason='we return only labels, without the root name')
@mark.integration_test
def test_collection_api_eth_suffix(self, test_test_client):
response = test_test_client.post("/find_collections_by_string", json={
Expand All @@ -109,9 +109,9 @@ def test_collection_api_eth_suffix(self, test_test_client):
response_json = response.json()

assert len(response_json['related_collections'] + response_json['other_collections']) <= 5
assert all([member_name['name'].endswith('.eth')
assert all([member_name['label'].endswith('.eth')
for collection in response_json['related_collections'] + response_json['other_collections']
for member_name in collection['top_names']])
for member_name in collection['top_labels']])

@mark.integration_test
def test_collection_api_avatar_emojis_and_images(self, test_test_client):
Expand Down Expand Up @@ -153,7 +153,7 @@ def test_collection_api_instant_search(self, test_test_client):
"max_total_collections": 15,
"name_diversity_ratio": 0.5,
"max_per_type": 3,
"limit_names": 10,
"limit_labels": 10,
})

assert response.status_code == 200
Expand All @@ -172,7 +172,7 @@ def test_collection_api_domain_details(self, test_test_client):
"max_total_collections": 6,
"name_diversity_ratio": 0.5,
"max_per_type": 3,
"limit_names": 10,
"limit_labels": 10,
})

assert response.status_code == 200
Expand All @@ -190,7 +190,7 @@ def test_collection_api_domain_details_pagination_by_string(self, test_test_clie
"max_total_collections": 100,
"name_diversity_ratio": None, # no diversity
"max_per_type": None,
"limit_names": 10,
"limit_labels": 10,
"sort_order": 'Z-A', # sort
"offset": 0, # page 1
"max_related_collections": 100,
Expand All @@ -207,7 +207,7 @@ def test_collection_api_domain_details_pagination_by_string(self, test_test_clie
"max_total_collections": 100,
"name_diversity_ratio": None, # no diversity
"max_per_type": None,
"limit_names": 10,
"limit_labels": 10,
"sort_order": 'Z-A', # sort
"offset": 100, # page 2
"max_related_collections": 100,
Expand Down Expand Up @@ -269,7 +269,7 @@ def test_collection_api_find_collections_by_member_list_az(self, test_test_clien
response = test_test_client.post("/find_collections_by_member", json={
"label": "australia",
"sort_order": "A-Z",
"limit_names": lim,
"limit_labels": lim,
"mode": 'domain_detail',
"offset": 10,
'max_results': 30
Expand All @@ -279,8 +279,8 @@ def test_collection_api_find_collections_by_member_list_az(self, test_test_clien
response_json = response.json()
collection_list = response_json['collections']

# test limit names
assert all([len(c['top_names']) <= lim for c in collection_list])
# test limit labels
assert all([len(c['top_labels']) <= lim for c in collection_list])

# test A-Z sort
titles = [c['title'] for c in collection_list]
Expand Down Expand Up @@ -308,7 +308,7 @@ def test_collection_api_find_collections_by_member_page_out_of_bounds(self, test
response = test_test_client.post("/find_collections_by_member", json={
"label": "softmachine",
"mode": "domain_detail",
"limit_names": 10,
"limit_labels": 10,
"sort_order": 'AI', # sort
"offset": 20, # page out of bounds (offset >= n_matched_collections)
"max_related_collections": 100,
Expand All @@ -331,7 +331,7 @@ def test_collection_api_find_collections_by_collection(self, test_test_client):
"max_total_collections": 10,
"name_diversity_ratio": 0.5,
"max_per_type": 3,
"limit_names": 10,
"limit_labels": 10,
"sort_order": 'Relevance'
})

Expand All @@ -351,7 +351,7 @@ def test_collection_api_find_collections_by_collection_az(self, test_test_client
"min_other_collections": 0,
"max_other_collections": 4,
"max_total_collections": 10,
"limit_names": 6,
"limit_labels": 6,
"offset": 8,
"sort_order": 'A-Z'
})
Expand All @@ -362,8 +362,8 @@ def test_collection_api_find_collections_by_collection_az(self, test_test_client

collection_list = response_json['related_collections']

# test limit names
assert all([len(c['top_names']) <= 6 for c in collection_list])
# test limit labels
assert all([len(c['top_labels']) <= 6 for c in collection_list])

# test A-Z sort
titles = [c['title'] for c in collection_list]
Expand All @@ -383,7 +383,7 @@ def test_collection_api_find_collections_by_collection_not_found(self, test_test
"max_total_collections": 6,
"name_diversity_ratio": 0.5,
"max_per_type": 3,
"limit_names": 10,
"limit_labels": 10,
})
assert response.status_code == 404

Expand All @@ -402,10 +402,10 @@ def test_collection_api_member_dot(self, test_test_client):
assert response.status_code == 422

@mark.integration_test
def test_collection_api_instant_search_limit_names_gt_10(self, test_test_client):
def test_collection_api_instant_search_limit_labels_gt_10(self, test_test_client):
response = test_test_client.post("/find_collections_by_string", json={
"query": "australia",
"limit_names": 11,
"limit_labels": 11,
})
assert response.status_code == 422

Expand Down Expand Up @@ -568,8 +568,8 @@ def test_fetch_collection_members_pagination(self, test_test_client):
response2_json = response2.json()

# Verify different pages return different members
first_page_names = [s['name'] for s in response_json['suggestions']]
second_page_names = [s['name'] for s in response2_json['suggestions']]
first_page_names = [s['label'] for s in response_json['suggestions']]
second_page_names = [s['label'] for s in response2_json['suggestions']]
assert not set(first_page_names).intersection(second_page_names)

@mark.integration_test
Expand Down Expand Up @@ -600,7 +600,7 @@ def test_fetch_collection_members_high_offset(self, test_test_client):

@mark.integration_test
def test_fetch_collection_members_tokenized_names(self, test_test_client):
name2labeltokens = {
label2tokens = {
"dualipa": ("dua", "lipa"),
"thebeatles": ("the", "beatles"),
"davidbowie": ("david", "bowie")
Expand All @@ -615,9 +615,9 @@ def test_fetch_collection_members_tokenized_names(self, test_test_client):
assert response.status_code == 200
response_json = response.json()
for item in response_json['suggestions']:
assert ''.join(item['tokenized_label']) == item['name']
if item['name'] in name2labeltokens:
assert tuple(item['tokenized_label']) == name2labeltokens[item['name']]
assert ''.join(item['tokenized_label']) == item['label']
if item['label'] in label2tokens:
assert tuple(item['tokenized_label']) == label2tokens[item['label']]

@mark.integration_test
def test_get_collection_by_id(self, test_test_client):
Expand All @@ -630,9 +630,10 @@ def test_get_collection_by_id(self, test_test_client):
assert collection['collection_id'] == "ri2QqxnAqZT7"
assert 'title' in collection
assert 'owner' in collection
assert 'number_of_names' in collection
assert 'number_of_labels' in collection
assert 'last_updated_timestamp' in collection
assert 'top_names' in collection
assert 'top_labels' in collection
assert all([tuple(label.keys()) == ('label',) for label in collection['top_labels']])
assert 'types' in collection
assert 'avatar_emoji' in collection
assert 'avatar_image' in collection
Expand Down Expand Up @@ -677,7 +678,7 @@ def test_collection_api_unavailability_find_collections_by_collection(self, test
"min_other_collections": 0,
"max_other_collections": 2,
"max_total_collections": 10,
"limit_names": 6,
"limit_labels": 6,
"offset": 8,
"sort_order": 'A-Z'
})
Expand Down
Loading

0 comments on commit dc258b3

Please sign in to comment.