Skip to content

Commit

Permalink
need to work on TODOs
Browse files Browse the repository at this point in the history
  • Loading branch information
amiicao committed Nov 27, 2024
1 parent ba57c63 commit 8964b59
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 25 deletions.
44 changes: 30 additions & 14 deletions src/pull_request_handler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import json
import logging
import ollama
from typing import List, Dict

import os
from config import OLLAMA_MODEL
from src.vector_db import query_by_function_names
from src.github_api import leave_comment
Expand All @@ -19,20 +20,21 @@ def generate_pr_feedback(

prompt = f"""
You are to provide feedback on a code base pull request. Follow the Instructions to provide your feedback on the Context below.
You are a Pull Request Criticiser.
Follow the Instructions below given the Context.
Start of Instructions:
1. Review the pull request's title, description, code diff, and relevant context from the codebase.
2. Using point-form, succinctly identify potential issues, code smells, code duplication, or downsides of
the pull request.
3. Consider interactions with the codebase and architectural design.
1. Read the pull request's title, description, code diff, and relevant context from the codebase.
2. Using point-form, concisely identify potential issues, code smells, code duplication, or downsides of
the pull request. Include file name in your references.
3. Consider interactions of the aforementioned code diff with the relevant context from codebase and architectural design.
4. Remember that in the code diff, '+' is code addition and '-' is code subtraction.
5. Do not provide refactored code as part of your feedback.
6. End your response with the sentence "Has the PR author considered these points?"
End of Instructions.
End of Instructions.
Start of Context:
Pull Request:
Expand All @@ -46,6 +48,8 @@ def generate_pr_feedback(
{pr_diff}
End of Context.
"""
logging.info("generating feedback...")
response = ollama.chat(
Expand All @@ -54,9 +58,23 @@ def generate_pr_feedback(

return response["message"]["content"]

def get_function_dependencies(changed_files: List[str]):

def get_function_dependencies(changed_files: List[str]) -> List[str]:
"""Gets all internal functions that are imported into a file"""
pass # TODO: ensure we validate the changed_file is actially in main
# TODO: ensure we validate the changed_file is actially in main,
# TODO: modify json file so that dots become slashes here or in gen_deps.py
function_set = set()

# remove file extension #todo add project path to the finction path and
changed_files = [os.path.splitext(x)[0] for x in changed_files]

with open("dependencies.json") as f:
dependency_graph = json.load(f)
for file in changed_files:
if file in dependency_graph:
function_set.update(dependency_graph[file])
return list(function_set)


def handle_new_pull_request(
installation_id: str,
Expand All @@ -70,12 +88,10 @@ def handle_new_pull_request(
):
"""Handle new pull request webhook"""
try:

# todo: get all used function dependencies
dependency_functions = get_function_dependencies(changed_files)
dependency_function_code = query_by_function_names(dependency_functions, repo_id)
dependency_function_context = query_by_function_names(dependency_functions, repo_id)
# Generate feedback
feedback = generate_pr_feedback(dependency_function_code, pr_title, pr_body, pr_diff)
feedback = generate_pr_feedback(dependency_function_context, pr_title, pr_body, pr_diff)

# Post comment
leave_comment(installation_id, repo_full_name, pr_number, feedback)
Expand Down
23 changes: 14 additions & 9 deletions src/vector_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,20 @@ def embed_code_base(
functions = extract_function_info(file_path)

# Embed and store functions
for func in functions:
for func in functions: #
# Get relative path of function
# root_dir = os.path.dirname(os.path.abspath(__file__))
# relative_path = os.path.relpath(func['function_path'], root_dir)
# func["file_path"] = relative_path
# TODO: done remove up to root dir ; C:\Users\amy36\PycharmProjects\doppelganger\src\webhook_handler.py:handle_pull_requests
# Create embedding
embedding = model.encode(
f"{func['name']} {func['docstring']} {func['source_code']}"
f"{func['source_code']}"
).tolist()

# Add to ChromaDB
collection.add(
ids=[func['name']],
ids=[func["function_path"]],
embeddings=[embedding],
documents=[func["source_code"]],
metadatas=[{
Expand All @@ -144,7 +149,7 @@ def embed_code_base(
)


def query_by_function_names(functions, repo_id):
def query_by_function_names(function_paths, repo_id):
collection = get_collection_for_repo_branch(repo_id)
"""
Retrieve full code and metadata for a list of specific function name paths.
Expand All @@ -154,19 +159,19 @@ def query_by_function_names(functions, repo_id):
:return: List of dictionaries containing function details
"""
# Validate input
if not functions:
if not function_paths:
return []

# Retrieve functions from ChromaDB
results = collection.get(
ids=functions
)
results = collection.get(where={"function_path":{"$in": function_paths}}) # TODO: process function_paths to match something in the vectorDB

# Process and format results
function_details = []
# todo: make schema for metadata
# metadata:{'file_path': 'C:\\Users\\amy36\\PycharmProjects\\doppelganger\\src\\github_api.py', 'function_name': 'fetch_existing_issues', 'function_path': 'C:\\Users\\amy36\\PycharmProjects\\doppelganger\\src\\github_api.py:fetch_existing_issues'}
for i in range(len(results['ids'])):
function_info = {
'function_name': results['ids'][i],
# 'function_name': results['ids'][i],
'source_code': results['documents'][i] if results['documents'] else None,
'metadata': results['metadatas'][i] if results['metadatas'] else None
}
Expand Down
5 changes: 3 additions & 2 deletions src/webhook_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,14 @@ def handle_pull_requests(data, installation_id):

if action == "opened" or action == "edited":
temp_dir = None
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) #todo: why is ROOT_DIR "'C:\\Users\\amy36\\PycharmProjects\\doppelganger\\src"? it should not include /src
# try:
# # Update main branch collection if needed
# temp_dir = clone_repo_branch(installation_id, repo_full_name, "main")
# code_files = index_code_files(temp_dir)
# add_code_to_chroma(code_files, repo_id, "main")
embed_code_base(repo_id, ROOT_DIR) # TODO: ensure embeddings are updated after a pull request (look at actions)
embed_code_base(repo_id, ROOT_DIR) # TODO: 1. add embdeddings when a repository is added, ensure root dir is from the repo ofo rep_id
# TODO: 2. ensure embeddings are updated after a pull request (look at actions) on main

# Handle the pull request
handle_new_pull_request(
Expand Down

0 comments on commit 8964b59

Please sign in to comment.