feat: add singleton for memos and llm

fridayL · fridayL · commit 4e3f77b7707b · 2025-07-08T17:27:04.000+08:00
diff --git a/src/memos/api/config.py b/src/memos/api/config.py
@@ -46,9 +46,9 @@ def get_activation_config() -> Dict[str, Any]:
     "config": {
         "memory_filename": "activation_memory.pickle",
         "extractor_llm": {
-            "backend": "huggingface",
+            "backend": "huggingface_singleton",
             "config": {
-            "model_name_or_path": "Qwen/Qwen3-1.7B",
+            "model_name_or_path": os.getenv("MOS_CHAT_MODEL", "Qwen/Qwen3-1.7B"),
             "temperature": 0.8,
             "max_tokens": 1024,
             "top_p": 0.9,
diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py
@@ -44,7 +44,7 @@ class UserRegisterRequest(BaseRequest):
 class GetMemoryRequest(BaseRequest):
     """Request model for getting memories."""
     user_id: str = Field(..., description="User ID")
-    memory_type: Literal["text_mem", "act_mem", "param_mem"] = Field(..., description="Memory type")
+    memory_type: Literal["text_mem", "act_mem", "param_mem", "para_mem"] = Field(..., description="Memory type")
     mem_cube_ids: list[str] | None = Field(None, description="Cube IDs")
     search_query: str | None = Field(None, description="Search query")
 
diff --git a/src/memos/api/routers/product_router.py b/src/memos/api/routers/product_router.py
@@ -27,7 +27,7 @@ def get_mos_product_instance():
         from memos.configs.mem_os import MOSConfig
         mos_config = MOSConfig(**default_config)
         MOS_PRODUCT_INSTANCE = MOSProduct(default_config=mos_config)
-        logger.info("MOSProduct instance created successfully")
+        logger.info("MOSProduct instance created successfully with inheritance architecture")
     return MOS_PRODUCT_INSTANCE
 
 get_mos_product_instance()
@@ -265,24 +265,24 @@ async def update_user_config(user_id: str, config_data: dict):
         raise HTTPException(status_code=500, detail=str(traceback.format_exc()))
 
 
-@router.get("/instances/status", summary="Get user instance status", response_model=BaseResponse[dict])
+@router.get("/instances/status", summary="Get user configuration status", response_model=BaseResponse[dict])
 async def get_instance_status():
-    """Get information about active user instances in memory."""
+    """Get information about active user configurations in memory."""
     try:
         mos_product = get_mos_product_instance()
         status_info = mos_product.get_user_instance_info()
         return BaseResponse(
-            message="Instance status retrieved successfully",
+            message="User configuration status retrieved successfully",
             data=status_info
         )
     except Exception as e:
-        logger.error(f"Failed to get instance status: {traceback.format_exc()}")
+        logger.error(f"Failed to get user configuration status: {traceback.format_exc()}")
         raise HTTPException(status_code=500, detail=str(traceback.format_exc()))
 
 
 @router.get("/instances/count", summary="Get active user count", response_model=BaseResponse[int])
 async def get_active_user_count():
-    """Get the number of active user instances in memory."""
+    """Get the number of active user configurations in memory."""
     try:
         mos_product = get_mos_product_instance()
         count = mos_product.get_active_user_count()
diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py
@@ -54,6 +54,7 @@ class LLMConfigFactory(BaseConfig):
         "openai": OpenAILLMConfig,
         "ollama": OllamaLLMConfig,
         "huggingface": HFLLMConfig,
+        "huggingface_singleton": HFLLMConfig,  # Add singleton support
     }
 
     @field_validator("backend")
diff --git a/src/memos/configs/memory.py b/src/memos/configs/memory.py
@@ -51,9 +51,9 @@ class KVCacheMemoryConfig(BaseActMemoryConfig):
     @classmethod
     def validate_extractor_llm(cls, extractor_llm: LLMConfigFactory) -> LLMConfigFactory:
         """Validate the extractor_llm field."""
-        if extractor_llm.backend != "huggingface":
+        if extractor_llm.backend not in ["huggingface", "huggingface_singleton"]:
             raise ConfigurationError(
-                f"KVCacheMemoryConfig requires extractor_llm backend to be 'huggingface', got '{extractor_llm.backend}'"
+                f"KVCacheMemoryConfig requires extractor_llm backend to be 'huggingface' or 'huggingface_singleton', got '{extractor_llm.backend}'"
             )
         return extractor_llm
 
@@ -83,9 +83,9 @@ class LoRAMemoryConfig(BaseParaMemoryConfig):
     @classmethod
     def validate_extractor_llm(cls, extractor_llm: LLMConfigFactory) -> LLMConfigFactory:
         """Validate the extractor_llm field."""
-        if extractor_llm.backend not in ["huggingface"]:
+        if extractor_llm.backend not in ["huggingface", "huggingface_singleton"]:
             raise ConfigurationError(
-                f"LoRAMemoryConfig requires extractor_llm backend to be 'huggingface', got '{extractor_llm.backend}'"
+                f"LoRAMemoryConfig requires extractor_llm backend to be 'huggingface' or 'huggingface_singleton', got '{extractor_llm.backend}'"
             )
         return extractor_llm
 
diff --git a/src/memos/llms/factory.py b/src/memos/llms/factory.py
@@ -3,6 +3,7 @@
 from memos.configs.llm import LLMConfigFactory
 from memos.llms.base import BaseLLM
 from memos.llms.hf import HFLLM
+from memos.llms.hf_singleton import HFSingletonLLM
 from memos.llms.ollama import OllamaLLM
 from memos.llms.openai import OpenAILLM
 
@@ -14,6 +15,7 @@ class LLMFactory(BaseLLM):
         "openai": OpenAILLM,
         "ollama": OllamaLLM,
         "huggingface": HFLLM,
+        "huggingface_singleton": HFSingletonLLM,  # Add singleton version
     }
 
     @classmethod
diff --git a/src/memos/llms/hf_singleton.py b/src/memos/llms/hf_singleton.py
@@ -0,0 +1,121 @@
+import threading
+from typing import Dict, Optional
+from memos.configs.llm import HFLLMConfig
+from memos.llms.hf import HFLLM
+from memos.log import get_logger
+
+logger = get_logger(__name__)
+
+
+class HFSingletonLLM(HFLLM):
+    """
+    Singleton version of HFLLM that prevents multiple loading of the same model.
+    This class inherits from HFLLM and adds singleton behavior.
+    """
+    
+    _instances: Dict[str, 'HFSingletonLLM'] = {}
+    _lock = threading.Lock()
+    
+    def __new__(cls, config: HFLLMConfig):
+        """
+        Singleton pattern implementation.
+        Returns existing instance if config already exists, otherwise creates new one.
+        """
+        config_key = cls._get_config_key(config)
+        
+        if config_key in cls._instances:
+            logger.debug(f"Reusing existing HF model: {config.model_name_or_path}")
+            return cls._instances[config_key]
+        
+        with cls._lock:
+            # Double-check pattern to prevent race conditions
+            if config_key in cls._instances:
+                logger.debug(f"Reusing existing HF model: {config.model_name_or_path}")
+                return cls._instances[config_key]
+            
+            logger.info(f"Creating new HF model: {config.model_name_or_path}")
+            instance = super().__new__(cls)
+            cls._instances[config_key] = instance
+            return instance
+    
+    def __init__(self, config: HFLLMConfig):
+        """
+        Initialize the singleton HFLLM instance.
+        Only initializes if this is a new instance.
+        """
+        # Check if already initialized
+        if hasattr(self, '_initialized'):
+            return
+        
+        # Call parent constructor
+        super().__init__(config)
+        self._initialized = True
+    
+    @classmethod
+    def _get_config_key(cls, config: HFLLMConfig) -> str:
+        """
+        Generate a unique key for the HF model configuration.
+        
+        Args:
+            config: The HFLLM configuration
+            
+        Returns:
+            A unique string key representing the configuration
+        """
+        # Create a unique key based on model path and key parameters 
+        # str(config.temperature),
+        # str(config.max_tokens),
+        # str(config.top_p),
+        # str(config.top_k),
+        # str(config.add_generation_prompt),
+        # str(config.remove_think_prefix),
+        # str(config.do_sample)
+        key_parts = [
+            config.model_name_or_path
+        ]
+        return "|".join(key_parts)
+    
+    @classmethod
+    def get_instance_count(cls) -> int:
+        """
+        Get the number of unique HF model instances currently managed.
+        
+        Returns:
+            Number of HF model instances
+        """
+        return len(cls._instances)
+    
+    @classmethod
+    def get_instance_info(cls) -> Dict[str, str]:
+        """
+        Get information about all managed HF model instances.
+        
+        Returns:
+            Dictionary mapping config keys to model paths
+        """
+        return {key: instance.config.model_name_or_path 
+                for key, instance in cls._instances.items()}
+    
+    @classmethod
+    def clear_all(cls) -> None:
+        """
+        Clear all HF model instances from memory.
+        This should be used carefully as it will force reloading of models.
+        """
+        with cls._lock:
+            cls._instances.clear()
+            logger.info("All HF model instances cleared from singleton manager")
+
+
+# Convenience function to get singleton manager info
+def get_hf_singleton_info() -> Dict[str, int]:
+    """
+    Get information about the HF singleton manager.
+    
+    Returns:
+        Dictionary with instance count and info
+    """
+    return {
+        "instance_count": HFSingletonLLM.get_instance_count(),
+        "instance_info": HFSingletonLLM.get_instance_info()
+    } 
diff --git a/src/memos/mem_os/core.py b/src/memos/mem_os/core.py
@@ -477,16 +477,6 @@ def search(
                 logger.info(
                     f"🧠 [Memory] Searched memories from {mem_cube_id}:\n{self._str_memories(memories)}\n"
                 )
-            if (
-                (mem_cube_id in install_cube_ids)
-                and (mem_cube.act_mem is not None)
-                and self.config.enable_activation_memory
-            ):
-                memories = mem_cube.act_mem.extract(query)
-                result["act_mem"].append({"cube_id": mem_cube_id, "memories": [memories]})
-                logger.info(
-                    f"🧠 [Memory] Searched memories from {mem_cube_id}:\n{self._str_memories(memories)}\n"
-                )
         return result
 
     def add(
diff --git a/src/memos/mem_os/product.py b/src/memos/mem_os/product.py
diff --git a/src/memos/mem_os/utils/format_utils.py b/src/memos/mem_os/utils/format_utils.py

Original file line number	Diff line number	Diff line change
`@@ -54,6 +54,7 @@ class LLMConfigFactory(BaseConfig):`
`54`	`54`	`"openai": OpenAILLMConfig,`
`55`	`55`	`"ollama": OllamaLLMConfig,`
`56`	`56`	`"huggingface": HFLLMConfig,`
	`57`	`+ "huggingface_singleton": HFLLMConfig, # Add singleton support`
`57`	`58`	`}`
`58`	`59`
`59`	`60`	`@field_validator("backend")`