Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: [sc-26070] Disable adding ".eth" suffix to suggestions #327

Merged
merged 10 commits into from
Jan 26, 2025
119 changes: 99 additions & 20 deletions collection_models.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
from datetime import datetime
from typing import Optional, Literal, Union
from namegraph.xcollections.query_builder import SortOrder
from pydantic import BaseModel, Field, field_validator
from pydantic_core.core_schema import FieldValidationInfo
from pydantic import BaseModel, Field, PositiveInt, field_validator
from pydantic_core.core_schema import ValidationInfo

from models import UserInfo
from namegraph.xcollections.query_builder import SortOrder
from models import UserInfo, Metadata, RecursiveRelatedCollection


class CollectionName(BaseModel):
name: str = Field(title='name with `.eth`')
namehash: str = Field(title='namehash of the name')
class CollectionLabel(BaseModel):
label: str = Field(title='label from a collection')


class Collection(BaseModel):
collection_id: str = Field(title='id of the collection')
title: str = Field(title='title of the collection')
owner: str = Field(title='ETH address of the collection owner')
number_of_names: int = Field(title='total number of names in the collection')
number_of_labels: int = Field(title='total number of labels in the collection')
last_updated_timestamp: int = Field(title='timestamp in milliseconds of last collection update')
top_names: list[CollectionName] = Field(
title='top names stored in the collection (limited by `limit_names`)', description='can not be greater than 10')
top_labels: list[CollectionLabel] = Field(
title='top labels stored in the collection (limited by `limit_labels`)', description='can not be greater than 10')
types: list[str] = Field(title='list of types to which the collection belongs',
description='example of type is `human`')
avatar_emoji: str = Field(title='avatar emoji associated with this collection')
Expand Down Expand Up @@ -49,7 +49,7 @@ class BaseCollectionRequest(BaseModel):


class BaseCollectionSearchLimitOffsetSort(BaseCollectionRequest):
limit_names: int = Field(10, ge=0, le=10, title='the number of names returned in each collection',
limit_labels: int = Field(10, ge=0, le=10, title='the number of labels returned in each collection',
description='can not be greater than 10')
offset: int = Field(0,
title='offset of the first collection to return (used for pagination)',
Expand All @@ -64,9 +64,9 @@ class BaseCollectionSearch(BaseCollectionSearchLimitOffsetSort):
title='number of collections with the same type which are not penalized',
description='* set to null if you want to disable the penalization\n'
'* if the penalization algorithm is turned on then 3 times more results (than max_related_collections) are retrieved from Elasticsearch')
name_diversity_ratio: Optional[float] = Field(None, examples=[0.5], ge=0.0, le=1.0,
title='similarity value used for adding penalty to collections with similar names to other collections',
description='* if more than name_diversity_ratio % of the names have already been used, penalize the collection\n'
label_diversity_ratio: Optional[float] = Field(None, examples=[0.5], ge=0.0, le=1.0,
title='similarity value used for adding penalty to collections with similar labels to other collections',
description='* if more than label_diversity_ratio % of the labels have already been used, penalize the collection\n'
'* set to null if you want disable the penalization\n'
'* if the penalization algorithm is turned on then 3 times more results (than `max_related_collections`) '
'are retrieved from Elasticsearch'
Expand All @@ -86,13 +86,15 @@ class BaseCollectionSearchWithOther(BaseCollectionSearch): # instant search, do
'\nif not met, 422 status code is returned')

@field_validator('max_other_collections')
def max_other_between_min_other_and_max_total(cls, v: int, info: FieldValidationInfo) -> int:
@classmethod
def max_other_between_min_other_and_max_total(cls, v: int, info: ValidationInfo) -> int:
if 'min_other_collections' in info.data and info.data['min_other_collections'] > v:
raise ValueError('min_other_collections must not be greater than max_other_collections')
return v

@field_validator('max_total_collections')
def max_related_between_min_other_and_max_total(cls, v: int, info: FieldValidationInfo) -> int:
@classmethod
def max_related_between_min_other_and_max_total(cls, v: int, info: ValidationInfo) -> int:
if 'max_other_collections' in info.data and v < info.data['max_other_collections']:
raise ValueError('max_other_collections must not be greater than max_total_collections')
if 'min_other_collections' in info.data and 'max_related_collections' in info.data and \
Expand Down Expand Up @@ -129,18 +131,19 @@ class CollectionCountByStringRequest(BaseCollectionRequest):
pattern='^[^.]+$', examples=['zeus god'])
mode: str = Field('instant', title='request mode: instant, domain_detail', pattern=r'^(instant|domain_detail)$')


# ======== Collection Membership ========

class CollectionsContainingNameCountRequest(BaseCollectionRequest):
class CollectionsContainingLabelCountRequest(BaseCollectionRequest):
label: str = Field(title='label for which collection membership will be checked', examples=['zeus'])


class CollectionsContainingNameCountResponse(BaseCollectionQueryResponse):
class CollectionsContainingLabelCountResponse(BaseCollectionQueryResponse):
count: Union[int, str] = Field(
title='count of collections containing input label or `1000+` if more than 1000 results')


class CollectionsContainingNameRequest(BaseCollectionSearchLimitOffsetSort):
class CollectionsContainingLabelRequest(BaseCollectionSearchLimitOffsetSort):
label: str = Field(title='label for which membership will be checked for each collection', examples=['zeus'])
mode: str = Field('instant', title='request mode: instant, domain_detail', pattern=r'^(instant|domain_detail)$')
max_results: int = Field(3, ge=0, title='max number of collections to return (for each page)',
Expand All @@ -150,9 +153,85 @@ class CollectionsContainingNameRequest(BaseCollectionSearchLimitOffsetSort):
'* if AI - use intelligent endpoint-specific ranking\n'
'* if Relevance - use relevance ranking')

class CollectionsContainingNameResponse(BaseCollectionQueryResponse):

class CollectionsContainingLabelResponse(BaseCollectionQueryResponse):
collections: list[Collection] = Field(title='list of public collections the provided label is a member of')


class GetCollectionByIdRequest(BaseCollectionRequest):
collection_id: str = Field(title='id of the collection to fetch', examples=['ri2QqxnAqZT7'])


# ======== Suggestions from collections ========

class SuggestionFromCollection(BaseModel):
label: str = Field(title="label from a collection")
tokenized_label: list[str] = Field(title="suggested tokenization of label")
metadata: Optional[Metadata] = Field(None, title="information how suggestion was generated",
description="if metadata=False this key is absent")


class CollectionWithSuggestions(BaseModel):
suggestions: list[SuggestionFromCollection] = Field(title='suggestions from a collection')
collection_id: str = Field(title='id of the collection')
collection_title: str = Field(title='title of the collection')
collection_members_count: int = Field(title='number of members in the collection')
related_collections: list[RecursiveRelatedCollection] = Field(title='related collections to this collection')


class SampleCollectionMembers(BaseModel):
user_info: Optional[UserInfo] = Field(None, title='information about user making request')
collection_id: str = Field(title='id of the collection to sample from', examples=['qdeq7I9z0_jv'])
metadata: bool = Field(True, title='return all the metadata in response')
max_sample_size: int = Field(title='the maximum number of members to sample', ge=1, le=100,
description='if the collection has less members than max_sample_size, '
'all the members will be returned', examples=[5])
seed: int = Field(default_factory=lambda: int(datetime.now().timestamp()),
title='seed for random number generator',
description='if not provided (but can\'t be null), random seed will be generated')


class Top10CollectionMembersRequest(BaseModel):
user_info: Optional[UserInfo] = Field(None, title='information about user making request')
collection_id: str = Field(title='id of the collection to fetch labels from', examples=['ri2QqxnAqZT7'])
metadata: bool = Field(True, title='return all the metadata in response')
max_recursive_related_collections: int = Field(3, ge=0, le=10,
title='Set to 0 to disable the "recursive related collection search". '
'When set to a value between 1 and 10, '
'for each related collection we find, '
'we also do a (depth 1 recursive) lookup for this many related collections '
'to the related collection.')


class ScrambleCollectionTokens(BaseModel):
user_info: Optional[UserInfo] = Field(None, title='information about user making request')
collection_id: str = Field(title='id of the collection to take tokens from', examples=['3OB_f2vmyuyp'])
metadata: bool = Field(True, title='return all the metadata in response')
method: Literal['left-right-shuffle', 'left-right-shuffle-with-unigrams', 'full-shuffle'] = \
Field('left-right-shuffle-with-unigrams', title='method used to scramble tokens and generate new suggestions',
description='* left-right-shuffle - tokenize labels as bigrams and shuffle the right-side tokens (do not use unigrams)'
'\n* left-right-shuffle-with-unigrams - same as above, but with some tokens swapped with unigrams'
'\n* full-shuffle - shuffle all tokens from bigrams and unigrams and create random bigrams')
n_top_members: int = Field(25, title='number of collection\'s top members to include in scrambling', ge=1)
max_suggestions: Optional[PositiveInt] = Field(10, title='maximal number of suggestions to generate',
examples=[10], description='must be a positive integer or null\n* number of generated suggestions will be '
'`max_suggestions` or less (exactly `max_suggestions` if there are enough members)\n'
'* if null, no tokens are repeated')
seed: int = Field(default_factory=lambda: int(datetime.now().timestamp()),
title='seed for random number generator',
description='if not provided (but can\'t be null), random seed will be generated')


class FetchCollectionMembersRequest(BaseModel):
collection_id: str = Field(
title='id of the collection to fetch members from', examples=['ri2QqxnAqZT7']
)
offset: int = Field(
0, title='number of members to skip', description='used for pagination', ge=0
)
limit: int = Field(
10, title='maximum number of members to return', description='used for pagination', ge=1,
)
metadata: bool = Field(
True, title='return all the metadata in response'
)
2 changes: 1 addition & 1 deletion conf/prod_config_new.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -172,5 +172,5 @@ collections:
other_collections_path: data/collections_data/other_collections.json
collections_limit: 3
suggestions_limit: 25 # per one collections
name_diversity_ratio: 0.5
label_diversity_ratio: 0.5
max_per_type: 2
2 changes: 1 addition & 1 deletion conf/test_config_new.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -175,5 +175,5 @@ collections:
other_collections_path: data/collections_data/other_collections.json
collections_limit: 3
suggestions_limit: 25 # per one collections
name_diversity_ratio: 0.5
label_diversity_ratio: 0.5
max_per_type: 2
Loading
Loading