Skip to content

Commit 85ea146

Browse files
authored
feature: mem reader update and some bug fixs (#43)
* fix n4j cypher query * feat: add llm extra body * feat: update memory extraction prompt and result parser * fix: evaluation locomo search * ci: fix format and update test * feat: update result json parser
1 parent 97fdb06 commit 85ea146

File tree

8 files changed

+62
-22
lines changed

8 files changed

+62
-22
lines changed

src/memos/configs/llm.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class OpenAILLMConfig(BaseLLMConfig):
2424
api_base: str = Field(
2525
default="https://api.openai.com/v1", description="Base URL for OpenAI API"
2626
)
27+
extra_body: Any = Field(default=None, description="extra body")
2728

2829

2930
class OllamaLLMConfig(BaseLLMConfig):

src/memos/configs/mem_reader.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ class BaseMemReaderConfig(BaseConfig):
2222
chunker: ChunkerConfigFactory = Field(
2323
..., description="Chunker configuration for the MemReader"
2424
)
25+
remove_prompt_example: bool = Field(
26+
default=False,
27+
description="whether remove example in memory extraction prompt to save token",
28+
)
2529

2630

2731
class SimpleStructMemReaderConfig(BaseMemReaderConfig):

src/memos/graph_dbs/neo4j.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -811,7 +811,7 @@ def drop_database(self) -> None:
811811

812812
def _ensure_database_exists(self):
813813
with self.driver.session(database="system") as session:
814-
session.run(f"CREATE DATABASE {self.db_name} IF NOT EXISTS")
814+
session.run(f"CREATE DATABASE $db_name IF NOT EXISTS", db_name=self.db_name)
815815

816816
# Wait until the database is available
817817
for _ in range(10):

src/memos/llms/openai.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def generate(self, messages: MessageList) -> str:
2222
response = self.client.chat.completions.create(
2323
model=self.config.model_name_or_path,
2424
messages=messages,
25+
extra_body=self.config.extra_body,
2526
temperature=self.config.temperature,
2627
max_tokens=self.config.max_tokens,
2728
top_p=self.config.top_p,

src/memos/mem_reader/simple_struct.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import concurrent.futures
22
import copy
33
import json
4-
4+
import re
55
from abc import ABC
66
from typing import Any
77

@@ -17,6 +17,7 @@
1717
from memos.templates.mem_reader_prompts import (
1818
SIMPLE_STRUCT_DOC_READER_PROMPT,
1919
SIMPLE_STRUCT_MEM_READER_PROMPT,
20+
SIMPLE_STRUCT_MEM_READER_EXAMPLE,
2021
)
2122

2223

@@ -39,11 +40,11 @@ def __init__(self, config: SimpleStructMemReaderConfig):
3940
self.chunker = ChunkerFactory.from_config(config.chunker)
4041

4142
def _process_chat_data(self, scene_data_info, info):
42-
prompt = (
43-
SIMPLE_STRUCT_MEM_READER_PROMPT.replace("${user_a}", "user")
44-
.replace("${user_b}", "assistant")
45-
.replace("${conversation}", "\n".join(scene_data_info))
43+
prompt = SIMPLE_STRUCT_MEM_READER_PROMPT.replace(
44+
"${conversation}", "\n".join(scene_data_info)
4645
)
46+
if self.config.remove_prompt_example:
47+
prompt = prompt.replace(SIMPLE_STRUCT_MEM_READER_EXAMPLE, "")
4748

4849
messages = [{"role": "user", "content": prompt}]
4950

@@ -228,7 +229,11 @@ def _process_doc_data(self, scene_data_info, info):
228229

229230
def parse_json_result(self, response_text):
230231
try:
231-
response_text = response_text.replace("```", "").replace("json", "")
232+
json_start = response_text.find("{")
233+
response_text = response_text[json_start:]
234+
response_text = response_text.replace("```", "").strip()
235+
if response_text[-1] != "}":
236+
response_text += "}"
232237
response_json = json.loads(response_text)
233238
return response_json
234239
except json.JSONDecodeError as e:

src/memos/templates/mem_reader_prompts.py

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,42 @@
1-
SIMPLE_STRUCT_MEM_READER_PROMPT = """
2-
You are a memory extraction expert.
1+
SIMPLE_STRUCT_MEM_READER_PROMPT = """You are a memory extraction expert.
32
4-
Your task is to extract memories from the perspective of ${user_a}, based on a conversation between ${user_a} and ${user_b}. This means identifying what ${user_a} would plausibly remember — including their own experiences, thoughts, plans, or relevant statements and actions made by others (such as ${user_b}) that impacted or were acknowledged by ${user_a}.
3+
Your task is to extract memories from the perspective of user, based on a conversation between user and assistant. This means identifying what user would plausibly remember — including their own experiences, thoughts, plans, or relevant statements and actions made by others (such as assistant) that impacted or were acknowledged by user.
54
65
Please perform:
7-
1. Identify information that reflects ${user_a}'s experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful input from ${user_b} that ${user_a} acknowledged or responded to.
6+
1. Identify information that reflects user's experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful input from assistant that user acknowledged or responded to.
87
2. Resolve all time, person, and event references clearly:
98
- Convert relative time expressions (e.g., “yesterday,” “next Friday”) into absolute dates using the message timestamp if possible.
109
- Clearly distinguish between event time and message time.
1110
- If uncertainty exists, state it explicitly (e.g., “around June 2025,” “exact date unclear”).
1211
- Include specific locations if mentioned.
1312
- Resolve all pronouns, aliases, and ambiguous references into full names or identities.
1413
- Disambiguate people with the same name if applicable.
15-
3. Always write from a third-person perspective, referring to ${user_a} as
14+
3. Always write from a third-person perspective, referring to user as
1615
"The user" or by name if name mentioned, rather than using first-person ("I", "me", "my").
1716
For example, write "The user felt exhausted..." instead of "I felt exhausted...".
18-
4. Do not omit any information that ${user_a} is likely to remember.
17+
4. Do not omit any information that user is likely to remember.
1918
- Include all key experiences, thoughts, emotional responses, and plans — even if they seem minor.
2019
- Prioritize completeness and fidelity over conciseness.
21-
- Do not generalize or skip details that could be personally meaningful to ${user_a}.
20+
- Do not generalize or skip details that could be personally meaningful to user.
2221
2322
Return a single valid JSON object with the following structure:
2423
2524
{
2625
"memory list": [
2726
{
28-
"key": <string, a unique, concise memory title in English>,
27+
"key": <string, a unique, concise memory title>,
2928
"memory_type": <string, Either "LongTermMemory" or "UserMemory">,
3029
"value": <A detailed, self-contained, and unambiguous memory statement — written in English if the input conversation is in English, or in Chinese if the conversation is in Chinese>,
31-
"tags": <A list of relevant English thematic keywords (e.g.,
32-
["deadline", "team", "planning"])>
30+
"tags": <A list of relevant thematic keywords (e.g., ["deadline", "team", "planning"])>
3331
},
3432
...
3533
],
36-
"summary": <a natural paragraph summarizing the above memories from ${user_a}'s perspective, 120–200 words, same language as the input>
34+
"summary": <a natural paragraph summarizing the above memories from user's perspective, 120–200 words, same language as the input>
3735
}
3836
3937
Language rules:
40-
- The `value` fields and `summary` must match the language of the input conversation.
41-
- All metadata fields (`key`, `memory_type`, `tags`) must be in English.
38+
- The `key`, `value`, `tags`, `summary` fields must match the language of the input conversation.
39+
- Keep `memory_type` in English.
4240
4341
Example:
4442
Conversation:
@@ -71,8 +69,7 @@
7169
Conversation:
7270
${conversation}
7371
74-
Your Output:
75-
"""
72+
Your Output:"""
7673

7774
SIMPLE_STRUCT_DOC_READER_PROMPT = """
7875
You are an expert text analyst for a search and retrieval system. Your task is to process a document chunk and generate a single, structured JSON object.
@@ -96,3 +93,33 @@
9693
9794
Produce ONLY the JSON object as your response.
9895
"""
96+
97+
SIMPLE_STRUCT_MEM_READER_EXAMPLE = """Example:
98+
Conversation:
99+
user: [June 26, 2025 at 3:00 PM]: Hi Jerry! Yesterday at 3 PM I had a meeting with my team about the new project.
100+
assistant: Oh Tom! Do you think the team can finish by December 15?
101+
user: [June 26, 2025 at 3:00 PM]: I’m worried. The backend won’t be done until
102+
December 10, so testing will be tight.
103+
assistant: [June 26, 2025 at 3:00 PM]: Maybe propose an extension?
104+
user: [June 26, 2025 at 4:21 PM]: Good idea. I’ll raise it in tomorrow’s 9:30 AM meeting—maybe shift the deadline to January 5.
105+
106+
Output:
107+
{
108+
"memory list": [
109+
{
110+
"key": "Initial project meeting",
111+
"memory_type": "LongTermMemory",
112+
"value": "On June 25, 2025 at 3:00 PM, Tom held a meeting with their team to discuss a new project. The conversation covered the timeline and raised concerns about the feasibility of the December 15, 2025 deadline.",
113+
"tags": ["project", "timeline", "meeting", "deadline"]
114+
},
115+
{
116+
"key": "Planned scope adjustment",
117+
"memory_type": "UserMemory",
118+
"value": "Tom planned to suggest in a meeting on June 27, 2025 at 9:30 AM that the team should prioritize features and propose shifting the project deadline to January 5, 2026.",
119+
"tags": ["planning", "deadline change", "feature prioritization"]
120+
},
121+
],
122+
"summary": "Tom is currently focused on managing a new project with a tight schedule. After a team meeting on June 25, 2025, he realized the original deadline of December 15 might not be feasible due to backend delays. Concerned about insufficient testing time, he welcomed Jerry’s suggestion of proposing an extension. Tom plans to raise the idea of shifting the deadline to January 5, 2026 in the next morning’s meeting. His actions reflect both stress about timelines and a proactive, team-oriented problem-solving approach."
123+
}
124+
125+
"""

tests/configs/test_llm.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def test_openai_llm_config():
4747
"top_k",
4848
"api_base",
4949
"remove_think_prefix",
50+
"extra_body",
5051
],
5152
)
5253

tests/mem_reader/test_simple_structure.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def setUp(self):
1919
self.config.llm = MagicMock()
2020
self.config.embedder = MagicMock()
2121
self.config.chunker = MagicMock()
22+
self.config.remove_prompt_example = MagicMock()
2223

2324
# Mock dependencies
2425
with (

0 commit comments

Comments
 (0)