diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..e69de29 diff --git a/LICENSE b/LICENSE index 3f95d2a..b2f7a4d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023 Hrushikesh Dokala +Copyright (c) 2024 Hrushikesh Dokala Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/OSrecommender.py b/OSrecommender.py index c640e96..8bc8440 100644 --- a/OSrecommender.py +++ b/OSrecommender.py @@ -1,8 +1,8 @@ import streamlit as st -from user_data import get_repos -from db import recommend +from src.user_data import get_repos +from chroma.db import recommend import asyncio -from search import get_projects +from src.search import get_projects from linkpreview import link_preview @@ -18,7 +18,7 @@ def get_link_preview(url): if prompt: status_placeholder = st.empty() status_placeholder.text('Crawling your repositories...') - user_details, language_topics = get_repos(prompt) + user_details, language_topics = asyncio.run(get_repos(prompt)) status_placeholder.text('Crawling open source projects...') unique_repos = asyncio.run(get_projects(language_topics)) diff --git a/__pycache__/api.cpython-311.pyc b/__pycache__/api.cpython-311.pyc deleted file mode 100644 index 4b44378..0000000 Binary files a/__pycache__/api.cpython-311.pyc and /dev/null differ diff --git a/__pycache__/db.cpython-311.pyc b/__pycache__/db.cpython-311.pyc deleted file mode 100644 index a6b18ac..0000000 Binary files a/__pycache__/db.cpython-311.pyc and /dev/null differ diff --git a/__pycache__/search.cpython-311.pyc b/__pycache__/search.cpython-311.pyc deleted file mode 100644 index f277427..0000000 Binary files a/__pycache__/search.cpython-311.pyc and /dev/null differ diff --git a/__pycache__/user_data.cpython-311.pyc b/__pycache__/user_data.cpython-311.pyc deleted file mode 100644 index 7877ef3..0000000 Binary files a/__pycache__/user_data.cpython-311.pyc and /dev/null differ diff --git a/api.py b/api.py deleted file mode 100644 index 2f6f0a1..0000000 --- a/api.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -@author : Hrushikesh Dokala -username : Hk669 -""" - - -from fastapi import FastAPI, HTTPException -import uvicorn -from pydantic import BaseModel -from user_data import get_repos -from search import get_projects -from db import recommend -import asyncio - -app = FastAPI() - -class Recommendation(BaseModel): - username:str - - -@app.post('/osrecommender') -async def get_recommendations(request : Recommendation): - username = request.username - - try: - user_details, language_topics = get_repos(username) - unique_repos =await get_projects(language_topics) - - urls = recommend(user_details,unique_repos) - return {'recommendations': urls} - except Exception as e: - print(e) - raise HTTPException(status_code=500, detail = 'Error generating recommendatoins') - - -async def main(): - uvicorn.run(app,host='127.0.0.1',port=8000) - - -if __name__ == '__main__': - asyncio.run(main()) \ No newline at end of file diff --git a/db.py b/db.py deleted file mode 100644 index 0298628..0000000 --- a/db.py +++ /dev/null @@ -1,41 +0,0 @@ -import chromadb -import random - -# Recommendations -def recommend(user_details, repos): - recommendations =[] - - # starting a database - client = chromadb.Client() - - collection = client.create_collection("repos_collection") - - # iterate through every project of the user - projects = list(repos.values()) - for project in projects: - document = f"{project['full_name']} : {project['description']}" - - # add the data to the DB - collection.add( - documents = [document], - ids = [project['full_name']], - ) - - for user_proj in user_details: - new_doc = f"{user_proj['project_name']} : {user_proj['description']}" - results = collection.query( - query_texts = [new_doc], - n_results = 4, - ) - try: - # recommending the repos in random - recommended_proj_id = random.choice(results['ids'][0]) - recommendations.append(f"https://www.github.com/{recommended_proj_id}") - - # if not found any repo "no repos found" - except IndexError: - print(f"No recommendations found for projects{user_proj['project_name']}") - continue - return recommendations - - diff --git a/search.py b/search.py deleted file mode 100644 index 1b43e77..0000000 --- a/search.py +++ /dev/null @@ -1,119 +0,0 @@ -import asyncio -from datetime import datetime -from aiohttp import ClientSession - -class Octokit: - def __init__(self, auth, session): - self.auth = auth - self.session = session - - - async def request(self, method, url, params=None): - headers = { - 'Authorization': 'BEARER ' + self.auth, - 'Accept': 'application/vnd.github+json', - } - - url = 'https://api.github.com' + url - - while True: - async with self.session.request(method, url, headers=headers, params=params) as response: - if response.status == 403: - reset_time = datetime.fromtimestamp(int(response.headers["X-RateLimit-Reset"])) - sleep_time = (reset_time - datetime.now()).total_seconds() + 5 - await asyncio.sleep(sleep_time) - else: - response.raise_for_status() - return await response.json() - -async def search_repositories(octokit, params): - response = await octokit.request('GET', '/search/repositories', params) - unique_repos = {} - - while len(response['items']) > 0 and params['page'] <= 10: - for item in response['items']: - if item['id'] not in unique_repos: - unique_repos[item['id']] = { - "full_name": item['full_name'], - "description": item['description'] - } - - params['page'] += 1 - response = await octokit.request('GET', '/search/repositories', params) - - return unique_repos - -# Define the main function -async def main(language_topics): - unique_repos = {} - - async with ClientSession() as session: - octokit = Octokit('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', session) - - languages = language_topics['languages'] - topics = language_topics['topics'] - - tasks = [] - - for language in languages: - base_params = { - 'q': f'stars:>=2000 forks:>=500 language:{language} pushed:>=2023-01-01', - 'sort': 'stars', - 'order': 'desc', - 'per_page': 100, - 'page': 1, - } - - help_wanted_params = base_params.copy() - help_wanted_params['q'] += ' help-wanted-issues:>=1' - tasks.append(asyncio.create_task(search_repositories(octokit, help_wanted_params))) - - good_first_issues_params = base_params.copy() - good_first_issues_params['q'] += ' good-first-issues:>=1' - tasks.append(asyncio.create_task(search_repositories(octokit, good_first_issues_params))) - - for topic in topics: - base_params = { - 'q': f'stars:>=2000 forks:>=500 topic:{topic} pushed:>=2023-01-01', - 'sort': 'stars', - 'order': 'desc', - 'per_page': 100, - 'page': 1, - } - - help_wanted_params = base_params.copy() - help_wanted_params['q'] += ' help-wanted-issues:>=1' - tasks.append(asyncio.create_task(search_repositories(octokit, help_wanted_params))) - - good_first_issues_params = base_params.copy() - good_first_issues_params['q'] += ' good-first-issues:>=1' - tasks.append(asyncio.create_task(search_repositories(octokit, good_first_issues_params))) - - results = await asyncio.gather(*tasks) - for result in results: - unique_repos.update(result) - - return unique_repos - - -async def get_projects(language_topics): - return await main(language_topics) - - - -def run_event_loop(): - language_topics = { - 'languages': ['python', 'javascript'], - 'topics': ['machine-learning', 'web-development'] - } - - loop = asyncio.get_event_loop() - result = loop.run_until_complete(get_projects(language_topics)) - loop.close() - - - for repo_id, repo_info in result.items(): - print(f"Repository ID: {repo_id}") - print(f"Full Name: {repo_info['full_name']}") - print(f"Description: {repo_info['description']}") - print("---------------------------") diff --git a/user_data.py b/user_data.py deleted file mode 100644 index 6739c1d..0000000 --- a/user_data.py +++ /dev/null @@ -1,50 +0,0 @@ -import requests - -#extracting the user repos data by sending requests with the Github Personal Access token -def get_repos(username): - #headers required to make requests to Github API - headers = { - 'Authorization': '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', #Enter your Github PAT - 'User-Agent': 'Open-Source-Repo-Recommender', - 'Accept': 'application/vnd.github.json', # Github mediatype, the format data is returned - } - - # To store the unique data of the user - languages_set = set() - topics_set = set() - user_details = [] - language_topics = {} - - - try: - # the request is sent to the below URL and returned data is stored in .json - url = f'https://api.github.com/users/{username}/repos' - repos = requests.get(url,headers=headers) - repos_data = repos.json() - - # iterates through every repository of the user data - for repo in repos_data: - - # It checks 1.Not Fork, 2.Description or Atleast one Language associated with the repository - if not repo['fork'] and (repo['description'] or repo['language'] or len(repo['topics'])>0): - - # If the conditioned is satisfied, makes a request for languages - languages = requests.get(repo['languages_url'],headers=headers) - languages_data = languages.json() - - # To store the user repositories name and the descripton - user_repo = { - 'project_name' : repo['name'], - 'description' : repo['description'], - } - user_details.append(user_repo) - - languages_set.update(languages_data.keys()) - topics_set.update(repo['topics']) - - language_topics = {"languages" : list(languages_set), "topics" : list(topics_set)} - - except Exception as e: - print(e) - - return user_details, language_topics \ No newline at end of file