Skip to content

Commit b624219

Browse files
committed
simplify the code
1 parent 7a388e4 commit b624219

File tree

7 files changed

+719
-893
lines changed

7 files changed

+719
-893
lines changed

README.md

+1-51
Original file line numberDiff line numberDiff line change
@@ -35,57 +35,7 @@ poetry run streamlit run app.py
3535
- `DatabaseManager` in `src/data_pipeline.py` to manage the database.
3636
- `RAG` class in `src/rag.py` to manage the whole RAG lifecycle.
3737

38-
<!-- CREATE Checklist -->
38+
- [ ] Conditional retrieval. Sometimes users just want to clarify a past conversation, no extra context needed.
3939
- [ ] Create an evaluation dataset
4040
- [ ] Evaluate the RAG performance on the dataset
4141
- [ ] Auto-optimize the RAG model
42-
<!-- ## Learn
43-
44-
## Local Storage
45-
We use adalflow's root directory, which is at ~/.adalflow.
46-
- repos/repo_name/...
47-
- repos/repo_name_db/...
48-
49-
- data_pipeline.py: From the main and local code test, you will know the process of download repo and chunk files, and embed the chunks.
50-
- rag.py: The main code of the RAG model. -->
51-
52-
<!-- ### Command Line Interface
53-
54-
Run the RAG system directly:
55-
```bash
56-
poetry run python rag.py
57-
```
58-
59-
## Usage Examples
60-
61-
1. **Demo Version (app.py)**
62-
- Ask about Alice (software engineer)
63-
- Ask about Bob (data scientist)
64-
- Ask about the company cafeteria
65-
- Test memory with follow-up questions
66-
67-
2. **Repository Analysis (app_repo.py)**
68-
- Enter your repository path
69-
- Click "Load Repository"
70-
- Ask questions about classes, functions, or code structure
71-
- View implementation details in expandable sections
72-
73-
## Security Note
74-
75-
- Never commit your `.streamlit/secrets.toml` file
76-
- Add it to your `.gitignore`
77-
- Keep your API key secure
78-
79-
## Example Queries
80-
81-
- "What does the RAG class do?"
82-
- "Show me the implementation of the Memory class"
83-
- "How is data processing handled?"
84-
- "Explain the initialization process"
85-
86-
## TODO
87-
88-
- [ ] Add evaluation metrics
89-
- [ ] Improve the embedding model
90-
- [ ] Improve the text splitter and chunking
91-
- [ ] Improve the retriever -->

app.py

+24-32
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
extract_class_definition,
66
extract_class_name_from_query,
77
)
8+
from typing import List
89

910
from config import DEFAULT_GITHUB_REPO
1011

@@ -51,9 +52,22 @@ def init_rag(repo_path_or_url: str):
5152
for message in st.session_state.messages:
5253
with st.chat_message(message["role"]):
5354
st.write(message["content"])
54-
if "context" in message:
55-
with st.expander(f"View source from {message.get('file_path', 'unknown')}"):
56-
st.code(message["context"], language=message.get("language", "python"))
55+
# if "context" in message:
56+
# with st.expander(f"View source from {message.get('file_path', 'unknown')}"):
57+
# st.code(message["context"], language=message.get("language", "python"))
58+
59+
from adalflow.core.types import Document
60+
61+
62+
def form_context(context: List[Document]):
63+
formatted_context = ""
64+
for doc in context:
65+
formatted_context += ""
66+
f"file_path: {doc.meta_data.get('file_path', 'unknown')} \n"
67+
f"language: {doc.meta_data.get('type', 'python')} \n"
68+
f"content: {doc.text} \n"
69+
return formatted_context
70+
5771

5872
if st.session_state.rag and (
5973
prompt := st.chat_input(
@@ -65,51 +79,29 @@ def init_rag(repo_path_or_url: str):
6579
with st.chat_message("user"):
6680
st.write(prompt)
6781

68-
class_name = extract_class_name_from_query(prompt)
82+
# class_name = extract_class_name_from_query(prompt)
83+
query = prompt
6984

7085
with st.chat_message("assistant"):
7186
with st.spinner("Analyzing code..."):
7287
response, docs = st.session_state.rag(prompt)
7388

7489
# Show relevant context first, then the explanation
7590
if docs and docs[0].documents:
76-
# Try to find implementation code first
77-
implementation_docs = [
78-
doc
79-
for doc in docs[0].documents
80-
if doc.meta_data.get("is_implementation", False)
81-
]
82-
83-
# Use implementation if found, otherwise use first document
84-
doc = (
85-
implementation_docs[0]
86-
if implementation_docs
87-
else docs[0].documents[0]
88-
)
89-
context = doc.text
90-
file_path = doc.meta_data.get("file_path", "unknown")
91-
file_type = doc.meta_data.get("type", "python")
92-
93-
# If asking about a specific class, try to extract just that class definition
94-
if class_name and file_type == "python":
95-
class_context = extract_class_definition(context, class_name)
96-
if class_context != context: # Only use if we found the class
97-
context = class_context
98-
99-
with st.expander(f"View source from {file_path}"):
100-
st.code(context, language=file_type)
91+
context = docs[0].documents
10192

10293
# Now show the explanation
103-
st.write(response)
94+
st.write(f"Rationale: {response.rationale}")
95+
st.write(f"Answer: {response.answer}")
96+
97+
st.write(f"context: {form_context(context)}")
10498

10599
# Add to chat history
106100
st.session_state.messages.append(
107101
{
108102
"role": "assistant",
109103
"content": response,
110104
"context": context,
111-
"file_path": file_path,
112-
"language": file_type,
113105
}
114106
)
115107
else:

config.py

+2-55
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,5 @@
11
from adalflow import OpenAIClient
22

3-
# Prompt templates for different use cases
4-
prompts = {
5-
"code_analysis": r"""
6-
You are a code analysis assistant that helps users understand code implementations.
7-
8-
Your task is to analyze code and explain its functionality, focusing on:
9-
1. Implementation details and how the code works
10-
2. Class methods, their purposes, and interactions
11-
3. Key algorithms and data structures used
12-
4. Code patterns and architectural decisions
13-
14-
When analyzing code:
15-
- Be concise and focus on the most important aspects
16-
- Explain the main purpose and key functionality first
17-
- Highlight critical methods and their roles
18-
- Keep explanations clear and to the point
19-
20-
When asked about a specific class or function:
21-
1. Start with a one-sentence overview
22-
2. List the key methods and their purposes
23-
3. Explain the main functionality
24-
4. Keep the explanation focused and brief
25-
26-
Previous conversation history is provided to maintain context of the discussion.
27-
Use the conversation history to provide more relevant and contextual answers about the code.
28-
29-
Output JSON format:
30-
{
31-
"answer": "Concise explanation of the code implementation",
32-
}""",
33-
"general_qa": r"""
34-
You are a helpful assistant answering questions about provided documents.
35-
36-
Your task is to:
37-
1. Answer questions based on the provided context
38-
2. Use conversation history to maintain coherent dialogue
39-
3. Be clear and concise in your responses
40-
4. Stay factual and only use information from the context
41-
42-
When responding:
43-
- Start with a direct answer to the question
44-
- Provide relevant details from the context
45-
- Maintain a friendly, conversational tone
46-
- If information is not in the context, say so
47-
48-
Previous conversation history is provided to maintain context of the discussion.
49-
Use the conversation history to provide more relevant and contextual answers.
50-
51-
Output JSON format:
52-
{
53-
"answer": "Clear and concise response based on the context",
54-
}""",
55-
}
563

574
configs = {
585
"embedder": {
@@ -77,8 +24,8 @@
7724
},
7825
"text_splitter": {
7926
"split_by": "word",
80-
"chunk_size": 100,
81-
"chunk_overlap": 20,
27+
"chunk_size": 400,
28+
"chunk_overlap": 100,
8229
},
8330
}
8431

0 commit comments

Comments
 (0)