Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error when using ollama embeding #2415

Closed
DStarEpoch opened this issue Mar 21, 2025 · 2 comments
Closed

Error when using ollama embeding #2415

DStarEpoch opened this issue Mar 21, 2025 · 2 comments

Comments

@DStarEpoch
Copy link

DStarEpoch commented Mar 21, 2025

🐛 Describe the bug

env:

mem0ai 0.1.73
ollama 0.4.7
openai 1.68.0

code:

os.environ["OPENAI_API_KEY"] = "..."
openai_client = OpenAI()
config = {
"llm": {
"provider": "ollama",
"config": {
"model": "llama3.1:8b",
"max_tokens": 4000,
"ollama_base_url": "http://localhost:11434",
}
},
"embedder": {
"provider": "ollama",
"config": {
"model": "mxbai-embed-large:335m",
"ollama_base_url": "http://localhost:11434"
}
}
}
memory = Memory.from_config(config)

def chat_with_memories(message: str, user_id: str = "default_user") -> str:
# Retrieve relevant memories
relevant_memories = memory.search(query=message, user_id=user_id, limit=3)
memories_str = "\n".join(f"- {entry['memory']}" for entry in relevant_memories["results"])

# Generate Assistant response
system_prompt = f"You are a helpful AI. Answer the question based on query and memories.\nUser Memories:\n{memories_str}"
messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": message}]
response = openai_client.chat.completions.create(model="moonshot-v1-8k", messages=messages)
assistant_response = response.choices[0].message.content

# Create new memories from the conversation
messages.append({"role": "assistant", "content": assistant_response})
memory.add(messages, user_id=user_id)

return assistant_response

def main():
print("Chat with AI (type 'exit' to quit)")
while True:
user_input = input("You: ").strip()
if user_input.lower() == 'exit':
print("Goodbye!")
break
print(f"AI: {chat_with_memories(user_input)}")

if name == "main":
main()

running with error:

Chat with AI (type 'exit' to quit)
You: lets play a card game

ValueError Traceback (most recent call last)
Cell In[5], line 11
8 print(f"AI: {chat_with_memories(user_input)}")
10 if name == "main":
---> 11 main()

Cell In[5], line 8, in main()
6 print("Goodbye!")
7 break
----> 8 print(f"AI: {chat_with_memories(user_input)}")

Cell In[4], line 3, in chat_with_memories(message, user_id)
1 def chat_with_memories(message: str, user_id: str = "default_user") -> str:
2 # Retrieve relevant memories
----> 3 relevant_memories = memory.search(query=message, user_id=user_id, limit=3)
4 memories_str = "\n".join(f"- {entry['memory']}" for entry in relevant_memories["results"])
6 # Generate Assistant response

File ~/software/miniconda3/envs/autogen0.4/lib/python3.12/site-packages/mem0/memory/main.py:462, in Memory.search(self, query, user_id, agent_id, run_id, limit, filters)
454 future_graph_entities = (
455 executor.submit(self.graph.search, query, filters, limit) if self.enable_graph else None
456 )
458 concurrent.futures.wait(
459 [future_memories, future_graph_entities] if future_graph_entities else [future_memories]
460 )
--> 462 original_memories = future_memories.result()
463 graph_entities = future_graph_entities.result() if future_graph_entities else None
465 if self.enable_graph:

File ~/software/miniconda3/envs/autogen0.4/lib/python3.12/concurrent/futures/_base.py:449, in Future.result(self, timeout)
447 raise CancelledError()
448 elif self._state == FINISHED:
--> 449 return self.__get_result()
451 self._condition.wait(timeout)
453 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:

File ~/software/miniconda3/envs/autogen0.4/lib/python3.12/concurrent/futures/_base.py:401, in Future.__get_result(self)
399 if self._exception:
400 try:
--> 401 raise self._exception
402 finally:
403 # Break a reference cycle with the exception in self._exception
404 self = None

File ~/software/miniconda3/envs/autogen0.4/lib/python3.12/concurrent/futures/thread.py:59, in _WorkItem.run(self)
56 return
58 try:
---> 59 result = self.fn(*self.args, **self.kwargs)
60 except BaseException as exc:
61 self.future.set_exception(exc)

File ~/software/miniconda3/envs/autogen0.4/lib/python3.12/site-packages/mem0/memory/main.py:482, in Memory._search_vector_store(self, query, filters, limit)
480 def _search_vector_store(self, query, filters, limit):
481 embeddings = self.embedding_model.embed(query, "search")
--> 482 memories = self.vector_store.search(query=embeddings, limit=limit, filters=filters)
484 excluded_keys = {
485 "user_id",
486 "agent_id",
(...) 492 "id",
493 }
495 original_memories = [
496 {
497 **MemoryItem(
(...) 512 for mem in memories
513 ]

File ~/software/miniconda3/envs/autogen0.4/lib/python3.12/site-packages/mem0/vector_stores/qdrant.py:143, in Qdrant.search(self, query, limit, filters)
131 """
132 Search for similar vectors.
133
(...) 140 list: Search results.
141 """
142 query_filter = self._create_filter(filters) if filters else None
--> 143 hits = self.client.query_points(
144 collection_name=self.collection_name,
145 query=query,
146 query_filter=query_filter,
147 limit=limit,
148 )
149 return hits.points

File ~/software/miniconda3/envs/autogen0.4/lib/python3.12/site-packages/qdrant_client/qdrant_client.py:558, in QdrantClient.query_points(self, collection_name, query, using, prefetch, query_filter, search_params, limit, offset, with_payload, with_vectors, score_threshold, lookup_from, consistency, shard_key_selector, timeout, **kwargs)
553 query = self._embed_models(query, is_query=True) if query is not None else None
554 prefetch = (
555 self._embed_models(prefetch, is_query=True) if prefetch is not None else None
556 )
--> 558 return self._client.query_points(
559 collection_name=collection_name,
560 query=query,
561 prefetch=prefetch,
562 query_filter=query_filter,
563 search_params=search_params,
564 limit=limit,
565 offset=offset,
566 with_payload=with_payload,
567 with_vectors=with_vectors,
568 score_threshold=score_threshold,
569 using=using,
570 lookup_from=lookup_from,
571 consistency=consistency,
572 shard_key_selector=shard_key_selector,
573 timeout=timeout,
574 **kwargs,
575 )

File ~/software/miniconda3/envs/autogen0.4/lib/python3.12/site-packages/qdrant_client/local/qdrant_local.py:466, in QdrantLocal.query_points(self, collection_name, query, using, prefetch, query_filter, search_params, limit, offset, with_payload, with_vectors, score_threshold, lookup_from, **kwargs)
463 query_filter = ignore_mentioned_ids_filter(query_filter, list(mentioned_ids))
465 prefetch = self._resolve_prefetches_input(prefetch, collection_name)
--> 466 return collection.query_points(
467 query=query,
468 prefetch=prefetch,
469 query_filter=query_filter,
470 using=using,
471 score_threshold=score_threshold,
472 limit=limit,
473 offset=offset or 0,
474 with_payload=with_payload,
475 with_vectors=with_vectors,
476 )

File ~/software/miniconda3/envs/autogen0.4/lib/python3.12/site-packages/qdrant_client/local/local_collection.py:703, in LocalCollection.query_points(self, query, prefetch, query_filter, limit, offset, with_payload, with_vectors, score_threshold, using, **kwargs)
690 scored_points = self._merge_sources(
691 sources=sources,
692 query=query,
(...) 699 score_threshold=score_threshold,
700 )
701 else:
702 # It is a base query
--> 703 scored_points = self._query_collection(
704 query=query,
705 using=using,
706 query_filter=query_filter,
707 limit=limit,
708 offset=offset,
709 with_payload=with_payload,
710 with_vectors=with_vectors,
711 score_threshold=score_threshold,
712 )
714 return types.QueryResponse(points=scored_points)

File ~/software/miniconda3/envs/autogen0.4/lib/python3.12/site-packages/qdrant_client/local/local_collection.py:840, in LocalCollection._query_collection(self, query, using, query_filter, limit, offset, with_payload, with_vectors, score_threshold)
838 return [record_to_scored_point(record) for record in records[offset:]]
839 elif isinstance(query, models.NearestQuery):
--> 840 return self.search(
841 query_vector=(using, query.nearest),
842 query_filter=query_filter,
843 limit=limit,
844 offset=offset,
845 with_payload=with_payload,
846 with_vectors=with_vectors,
847 score_threshold=score_threshold,
848 )
849 elif isinstance(query, models.RecommendQuery):
850 return self.recommend(
851 positive=query.recommend.positive,
852 negative=query.recommend.negative,
(...) 860 score_threshold=score_threshold,
861 )

File ~/software/miniconda3/envs/autogen0.4/lib/python3.12/site-packages/qdrant_client/local/local_collection.py:573, in LocalCollection.search(self, query_vector, query_filter, limit, offset, with_payload, with_vectors, score_threshold)
571 if isinstance(query_vector, np.ndarray):
572 if len(query_vector.shape) == 1:
--> 573 scores = calculate_distance(query_vector, vectors, distance)
574 else:
575 scores = calculate_multi_distance(query_vector, vectors, distance)

File ~/software/miniconda3/envs/autogen0.4/lib/python3.12/site-packages/qdrant_client/local/distances.py:152, in calculate_distance(query, vectors, distance_type)
149 assert not np.isnan(query).any(), "Query vector must not contain NaN"
151 if distance_type == models.Distance.COSINE:
--> 152 return cosine_similarity(query, vectors)
153 elif distance_type == models.Distance.DOT:
154 return dot_product(query, vectors)

File ~/software/miniconda3/envs/autogen0.4/lib/python3.12/site-packages/qdrant_client/local/distances.py:94, in cosine_similarity(query, vectors)
92 query_norm = np.linalg.norm(query)
93 query /= np.where(query_norm != 0.0, query_norm, EPSILON)
---> 94 return np.dot(vectors, query)
96 query_norm = np.linalg.norm(query, axis=-1)[:, np.newaxis]
97 query /= np.where(query_norm != 0.0, query_norm, EPSILON)

ValueError: shapes (0,1536) and (1024,) not aligned: 1536 (dim 1) != 1024 (dim 0)

@parshvadaftari
Copy link
Contributor

@DStarEpoch This is because mem0 by default takes in 1536 embedding model dims. And the Embedding model used here has max embedding dimension as 1024. Try adding the embedding_model_dims in the config if it helps you.

@DStarEpoch
Copy link
Author

@DStarEpoch This is because mem0 by default takes in 1536 embedding model dims. And the Embedding model used here has max embedding dimension as 1024. Try adding the embedding_model_dims in the config if it helps you.

Thanks for your instruction. It has been solved
I make change like this:
"embedder": {
"provider": "ollama",
"config": {
"model": "mxbai-embed-large:335m",
"ollama_base_url": "http://localhost:11434",
"embedding_dims": 1024,
}
},
"vector_store": {
"provider": "milvus",
"config": {
"collection_name": "my_interaction",
"url": "http://127.0.0.1:19530",
"embedding_model_dims": 1024,
},
},

It works!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants