Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Short Project Descriptions + Fetch Projects By Category #190

Merged
merged 5 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions web/components/ProjectModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ const ProjectModal = ({
contentStyles={{ padding: "p-4 sm:p-6 pt-0" }}>
{strategy && (
<div className='space-y-4'>
<div className='bg-indigo-50 p-3 rounded-xl space-y-3'>
<div className={clsx("text-sm leading-normal")}>
{strategy.project.short_description}
</div>
</div>
<div className='bg-indigo-50 p-3 rounded-xl space-y-3'>
<div
className='flex items-center justify-between pb-2 border-b border-indigo-600 group'>
Expand Down
2 changes: 2 additions & 0 deletions web/supabase/migrations/20240219173654_get_categories.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CREATE VIEW unique_categories_views AS SELECT DISTINCT unnest(categories) AS category
FROM "public"."projects";
12 changes: 12 additions & 0 deletions web/utils/ethereum/supportedErc20TokensByNetwork.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,5 +126,17 @@ export const supportedErc20TokensByNetwork: SupportedTokensInformation = {
decimals: 6,
name: "USDT"
}
},
Base: {
USDC: {
address: "0x833589fcd6edb6e08f4c7c32d4f71b54bda02913",
decimals: 6,
name: "USDC",
},
DAI: {
address: "0x50c5725949a6f0c72e6c4a641f24049a917db0cb",
decimals: 18,
name: "DAI",
}
}
};
70 changes: 37 additions & 33 deletions workers/fund_public_goods/db/tables/projects.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict
from typing import Any
from fund_public_goods.lib.strategy.models.answer import Answer
from supabase import PostgrestAPIResponse
from fund_public_goods.db.entities import Projects
Expand Down Expand Up @@ -66,39 +66,9 @@ def get(
logo=data["logo"]
)

def get_projects(range_from: int, range_to: int) -> PostgrestAPIResponse[Dict[str, Any]]:
db = create_admin()
return (
db.table("projects")
.select(
"id, updated_at, title, description, website, keywords, categories, short_description, twitter, logo, applications(id, recipient, round, answers)"
)
.range(range_from, range_to)
.execute()
)

def get_all_projects() -> list[dict[str, Any]]:
all_results: list[dict[str, Any]] = []
current_from = 0
page_size = 999
while True:
current_to = current_from + page_size
results = get_projects(current_from, current_to).data
all_results.extend(results)

if len(results) < page_size:
break

current_from += page_size

return all_results

def fetch_projects_data() -> list[tuple[Projects, list[Answer]]]:
data = get_all_projects()

def sanitize_projects_information(projects: list[dict[str, Any]]) -> list[tuple[Projects, list[Answer]]]:
projects_with_answers: list[tuple[Projects, list[Answer]]] = []

for item in data:
for item in projects:
answers: list[Answer] = []

for application in item.get("applications", []):
Expand Down Expand Up @@ -127,3 +97,37 @@ def fetch_projects_data() -> list[tuple[Projects, list[Answer]]]:
projects_with_answers.append((project, answers))

return projects_with_answers


def get_unique_categories() -> list[str]:
db = create_admin()
response: PostgrestAPIResponse[list[dict[str, str]]] = (
db.table("unique_categories_views").select("*").execute()
)
if not response.data:
return []

categories = []

for row in response.data:
categories.append(row["category"]) # type: ignore

return categories

def fetch_projects_by_category(categories: list[str]) -> list[tuple[Projects, list[Answer]]]:
results = get_projects_from_description(categories).data
sanitized_projects = sanitize_projects_information(results)
return sanitized_projects

def get_projects_from_description(categories: list[str]):
db = create_admin()
request = (
db.table("projects")
.select(
"id, updated_at, title, description, website, keywords, categories, short_description, twitter, logo, applications(id, recipient, round, answers)"
)
.ov("categories", categories)
.execute()
)

return request
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@

{categories}

A user's prompt can match to more than one category. Be strict with assigning categories.
Return a max of {n} categories, you can reutrn less if the project really only matches less than {n} categories.

Respond strictly with a comma-separated list of categories, without quotes. Do not change the wording
or casing of the categories, return them exactly as they are written in the list above.

You must make sure that the categorization of the prompt is extensive enough so projects can be retrieved
based on these categories.

Prompt: {prompt}
"""


def categorize_prompt(prompt: str, n: int) -> list[str]:
def categorize_prompt(prompt: str, categories: list[str]) -> list[str]:
categorize_prompt = ChatPromptTemplate.from_messages([
("system", categorize_prompt_template),
])
Expand All @@ -29,8 +29,7 @@ def categorize_prompt(prompt: str, n: int) -> list[str]:

categories = [c.strip() for c in categorize_chain.invoke({
"prompt": prompt,
'n': n,
"categories": "\n".join(f"- {category}" for category in PROJECT_CATEGORIES)
"categories": "\n".join(f"- {category}" for category in categories)
}).split(',')]

return categories
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from fund_public_goods.db.entities import Projects
from fund_public_goods.db.tables.projects import fetch_projects_data
from fund_public_goods.db.tables.projects import fetch_projects_by_category, get_unique_categories
from fund_public_goods.lib.strategy.models.answer import Answer
from fund_public_goods.lib.strategy.utils.categorize_prompt import categorize_prompt
from fund_public_goods.lib.strategy.utils.get_top_matching_projects import get_top_matching_projects
from fund_public_goods.lib.strategy.utils.utils import get_latest_project_per_website
from fund_public_goods.workflows.egress_gitcoin.upsert import sanitize_url


def fetch_matching_projects(prompt: str) -> list[tuple[Projects, list[Answer]]]:
fetched_projects = fetch_projects_data()
prompt_categories = categorize_prompt(prompt, get_unique_categories())
fetched_projects = fetch_projects_by_category(prompt_categories)

answers_by_id = { project.id: answers for (project, answers) in fetched_projects }
projects = [project for (project, _) in fetched_projects]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def rerank_top_projects(prompt: str, projects: list[Projects]) -> list[Projects]
reranking_chain = reranking_prompt | llm | StrOutputParser()

separator = "\n-----\n"

top_ids_res = reranking_chain.invoke({
"prompt": prompt,
"separator": separator,
Expand Down Expand Up @@ -132,11 +132,7 @@ def filter_projects_by_categories(projects: list[Projects], categories: list[str

def get_top_matching_projects(prompt: str, projects: list[Projects]) -> list[Projects]:
projects_by_id = {project.id: project for project in projects}

prompt_categories = categorize_prompt(prompt, 2)
projects_with_categories = filter_projects_by_categories(projects, prompt_categories)

all_projects_collection = create_embeddings_collection(projects_with_categories)
all_projects_collection = create_embeddings_collection(projects)
matches = all_projects_collection.similarity_search(prompt, k=300)
matched_project_ids = [match.metadata["id"] for match in matches]
unique_ids = get_top_n_unique_ids({prompt: matched_project_ids}, 30)
Expand Down
Loading