aws
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎python/README.md‎
Lines changed: 44 additions & 4 deletions b/‎python/README.md‎
Lines changed: 44 additions & 4 deletions
diff --git a/‎python/model_hosting_container_standards/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎python/model_hosting_container_standards/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/model_hosting_container_standards/common/handler/resolver.py‎
Lines changed: 5 additions & 5 deletions b/‎python/model_hosting_container_standards/common/handler/resolver.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎python/model_hosting_container_standards/sagemaker/__init__.py‎
Lines changed: 38 additions & 9 deletions b/‎python/model_hosting_container_standards/sagemaker/__init__.py‎
Lines changed: 38 additions & 9 deletions
diff --git a/‎python/model_hosting_container_standards/sagemaker/handler_resolver.py‎
Lines changed: 3 additions & 3 deletions b/‎python/model_hosting_container_standards/sagemaker/handler_resolver.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎python/model_hosting_container_standards/sagemaker/lora/FACTORY_USAGE.md‎
Lines changed: 39 additions & 21 deletions b/‎python/model_hosting_container_standards/sagemaker/lora/FACTORY_USAGE.md‎
Lines changed: 39 additions & 21 deletions
@@ -21,7 +21,8 @@ ModelHostingContainerStandards/
 │   │   │   ├── custom_code_ref_resolver/  # Dynamic code loading
 │   │   │   └── handler/       # Handler specifications
 │   │   └── sagemaker/         # SageMaker integration
-│   │       └── lora/          # LoRA adapter support
+│   │       ├── lora/          # LoRA adapter support
+│   │       └── sessions/      # Stateful session management
 │   ├── tests/                 # Package tests
 │   ├── examples/              # Python examples and demos
 │   ├── pyproject.toml         # Python project configuration
 
@@ -98,7 +98,7 @@ async def invocations(request: Request) -> dict:
 
 # Optional: Add LoRA adapter support
 @sagemaker_standards.register_invocation_handler
-@sagemaker_standards.inject_adapter_id("model")
+@sagemaker_standards.inject_adapter_id("model")  # Replace mode
 async def invocations_with_lora(request: Request) -> dict:
     """Invocation handler with LoRA adapter ID injection."""
     body = await request.json()
@@ -180,7 +180,8 @@ The system automatically resolves handlers in this order:
 @sagemaker_standards.register_invocation_handler
 
 # LoRA adapter support
-@sagemaker_standards.inject_adapter_id("model")
+@sagemaker_standards.inject_adapter_id("model")  # Replace mode (default)
+@sagemaker_standards.inject_adapter_id("model", append=True, separator=":")  # Append mode
 ```
 
 ### Customer Decorators (for model customization)
@@ -193,6 +194,10 @@ The system automatically resolves handlers in this order:
 # LoRA transform decorators
 @sagemaker_standards.register_load_adapter_handler(request_shape={...}, response_shape={...})
 @sagemaker_standards.register_unload_adapter_handler(request_shape={...}, response_shape={...})
+
+# LoRA adapter injection modes
+@sagemaker_standards.inject_adapter_id("model")  # Replace mode (default)
+@sagemaker_standards.inject_adapter_id("model", append=True, separator=":")  # Append mode
 ```
 
 ## Framework Examples
@@ -209,15 +214,24 @@ import json
 
 # Create router like real vLLM does
 router = APIRouter()
-
+@router.post("/ping", response_class=Response)
+@router.get("/ping", response_class=Response)
 @sagemaker_standards.register_ping_handler
 async def ping(raw_request: Request) -> Response:
     """Default vLLM ping handler with automatic routing."""
     return Response(
         content='{"status": "healthy", "source": "vllm_default", "message": "Default ping from vLLM server"}',
         media_type="application/json",
     )
-
+@router.post(
+        "/invocations",
+        dependencies=[Depends(validate_json_request)],
+        responses={
+            HTTPStatus.BAD_REQUEST.value: {"model": ErrorResponse},
+            HTTPStatus.UNSUPPORTED_MEDIA_TYPE.value: {"model": ErrorResponse},
+            HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse},
+        },
+    )
 @sagemaker_standards.register_invocation_handler
 @sagemaker_standards.inject_adapter_id("model")
 async def invocations(raw_request: Request) -> Response:
@@ -247,6 +261,32 @@ async def invocations(raw_request: Request) -> Response:
         media_type="application/json",
     )
 
+# Alternative: append mode for model field
+@sagemaker_standards.register_invocation_handler
+@sagemaker_standards.inject_adapter_id("model", append=True, separator=":")
+async def invocations_append_mode(raw_request: Request) -> Response:
+    """vLLM invocation handler with adapter ID appending."""
+    body_bytes = await raw_request.body()
+    try:
+        body = json.loads(body_bytes.decode()) if body_bytes else {}
+    except (json.JSONDecodeError, UnicodeDecodeError):
+        body = {}
+
+    # If body has {"model": "Qwen-7B"} and header has "my-lora"
+    # Result will be {"model": "Qwen-7B:my-lora"}
+    model_with_adapter = body.get("model", "base-model")
+
+    response_data = {
+        "predictions": ["Generated text from vLLM"],
+        "model_used": model_with_adapter,
+        "message": f"Response using model: {model_with_adapter}",
+    }
+
+    return Response(
+        content=json.dumps(response_data),
+        media_type="application/json",
+    )
+
 # Setup FastAPI app like real vLLM
 app = FastAPI(title="vLLM Server", version="1.0.0")
 app.include_router(router)
 
@@ -5,4 +5,4 @@
 - FastAPI: from .common.fastapi import EnvVars, ENV_CONFIG
 """
 
-__version__ = "0.1.2"
+__version__ = "0.1.4"
@@ -220,11 +220,11 @@ def resolve_handler(self, handler_type: str) -> Optional[Callable]:
             if handler:
                 return handler
 
-        # No handler found anywhere, use the framework default
-        handler = self.registry.get_framework_default(handler_type)
-        if handler:
-            logger.info(f"Use {handler_type} handler registered in framework")
-            return handler
+        # No handler found anywhere, let us just do nothing
+        # handler = self.registry.get_framework_default(handler_type)
+        # if handler:
+        #     logger.info(f"Use {handler_type} handler registered in framework")
+        #     return handler
 
         logger.debug(f"No {handler_type} handler found anywhere")
         return None
@@ -1,6 +1,6 @@
 """SageMaker integration decorators."""
 
-from typing import List, Optional
+from typing import Dict, List, Optional, Union
 
 from fastapi import FastAPI
 
@@ -18,6 +18,7 @@
     SageMakerLoRAApiHeader,
     create_lora_transform_decorator,
 )
+from .lora.models import AppendOperation
 from .sagemaker_loader import SageMakerFunctionLoader
 from .sagemaker_router import create_sagemaker_router
 from .sessions import create_session_transform_decorator
@@ -52,7 +53,9 @@ def register_unload_adapter_handler(
     )
 
 
-def inject_adapter_id(adapter_path: str):
+def inject_adapter_id(
+    adapter_path: str, append: bool = False, separator: Optional[str] = None
+):
     """Create a decorator that injects adapter ID from SageMaker headers into request body.
 
     This decorator extracts the adapter identifier from the SageMaker LoRA API header
@@ -63,27 +66,53 @@ def inject_adapter_id(adapter_path: str):
         adapter_path: The JSON path where the adapter ID should be injected in the
                      request body (e.g., "model", "body.model.lora_name", etc.).
                      Supports both simple keys and nested paths using dot notation.
+        append: If True, appends the adapter ID to the existing value at adapter_path
+                using the specified separator. If False (default), replaces the value.
+                When True, separator parameter is required.
+                Example with append=True and separator=":":
+                    {"model": "base-model"} -> {"model": "base-model:adapter-123"}
+        separator: The separator to use when append=True. Required when append=True.
+                  Common values include ":", "-", "_", etc.
 
     Returns:
         A decorator function that can be applied to FastAPI route handlers to
         automatically inject adapter IDs from headers into the request body.
 
+    Raises:
+        ValueError: If adapter_path is empty or not a string, or if append=True
+                   but separator is not provided.
+
     Note:
         This is a transform-only decorator that does not create its own route.
         It must be applied to existing route handlers.
     """
     # validate and preprocess
     if not adapter_path:
-        logger.exception("adapter_path cannot be empty")
+        logger.error("adapter_path cannot be empty")
         raise ValueError("adapter_path cannot be empty")
     if not isinstance(adapter_path, str):
-        logger.exception("adapter_path must be a string")
+        logger.error("adapter_path must be a string")
         raise ValueError("adapter_path must be a string")
-    # create request_shape
-    request_shape = {}
-    request_shape[adapter_path] = (
-        f'headers."{SageMakerLoRAApiHeader.ADAPTER_IDENTIFIER}"'
-    )
+    if append and separator is None:
+        logger.error(f"separator must be provided when {append=}")
+        raise ValueError(f"separator must be provided when {append=}")
+    if separator and not append:
+        logger.error(f"separator is specified {separator} but {append=}")
+        raise ValueError(f"separator is specified {separator} but {append=}")
+
+    # create request_shape with operation encoding
+    request_shape: Dict[str, Union[str, AppendOperation]] = {}
+    header_expr = f'headers."{SageMakerLoRAApiHeader.ADAPTER_IDENTIFIER}"'
+
+    if append:
+        # Encode append operation as a Pydantic model
+        request_shape[adapter_path] = AppendOperation(
+            separator=separator, expression=header_expr
+        )
+    else:
+        # Default replace behavior (backward compatible)
+        request_shape[adapter_path] = header_expr
+
     return create_lora_transform_decorator(LoRAHandlerType.INJECT_ADAPTER_ID)(
         request_shape=request_shape, response_shape=None
     )
 
@@ -96,9 +96,9 @@ def __init__(self) -> None:
 
 def register_sagemaker_overrides():
     def set_handler(handler_type):
-        handler_registry.set_handler(
-            handler_type, _resolver.resolve_handler(handler_type)
-        )
+        handler = _resolver.resolve_handler(handler_type)
+        if handler:
+            handler_registry.set_handler(handler_type, handler)
 
     set_handler("invoke")
     set_handler("ping")
@@ -140,41 +140,54 @@ async def unload_adapter(data: SimpleNamespace, raw_request: Request):
     return Response(status_code=200)
 ```
 
-**3. `inject_adapter_id(adapter_path)`**
+**3. `inject_adapter_id(adapter_path, append=False, separator=None)`**
 
-Creates a decorator for injecting adapter IDs from headers into the request body. Takes a simple string path specifying where to inject the adapter ID:
+Creates a decorator for injecting adapter IDs from headers into the request body. Supports both replace and append modes:
 
 ```python
 from model_hosting_container_standards.sagemaker import inject_adapter_id
 
+# Replace mode (default)
 @inject_adapter_id("lora_id")
-async def inject_adapter_id(raw_request: Request):
+async def inject_adapter_replace(raw_request: Request):
     # The request body now contains the adapter ID from the header
     return Response(status_code=200)
+
+# Append mode
+@inject_adapter_id("model", append=True, separator=":")
+async def inject_adapter_append(raw_request: Request):
+    # Appends adapter ID to existing model field
+    return Response(status_code=200)
 ```
 
 **How `inject_adapter_id` works:**
-- Takes a single `adapter_path` string parameter specifying where to inject the adapter ID in the request body
+- Takes an `adapter_path` string parameter specifying where to inject the adapter ID in the request body
 - Supports both simple keys (e.g., `"model"`) and nested paths using dot notation (e.g., `"body.model.lora_name"`)
 - Automatically extracts the adapter ID from the SageMaker header `X-Amzn-SageMaker-Adapter-Identifier`
-- Raises `ValueError` if `adapter_path` is empty or if `adapter_path` is not a string
+- **Replace mode (default)**: Replaces the existing value at the target path
+- **Append mode**: Appends the adapter ID to existing value using a separator
+- Raises `ValueError` if `adapter_path` is empty, not a string, or if `append=True` without `separator`
+
+**Injection Modes:**
 
 ```python
-# Simple path - injects at top level
+# Replace mode (default)
 @inject_adapter_id("model")
-# Results in: {"model": "<adapter_id>"}
 
-# Nested path - supports dot notation
-@inject_adapter_id("body.model.lora_name")
-# Results in: {"body": {"model": {"lora_name": "<adapter_id>"}}}
+# Append mode with colon separator
+@inject_adapter_id("model", append=True, separator=":")
+
+# Custom separators
+@inject_adapter_id("model", append=True, separator="-")  # Dash
+@inject_adapter_id("model", append=True, separator="")   # Direct concatenation
 ```
 
 ### Benefits of Convenience Functions
 
 1. **Shorter imports**: Import from `sagemaker` instead of `sagemaker.lora.factory`
 2. **Clearer intent**: Function names explicitly state what they do
 3. **Less boilerplate**: No need to import and reference `LoRAHandlerType`
-4. **Built-in validation**: `inject_adapter_id` validates and auto-fills the header mapping
+4. **Built-in validation**: `inject_adapter_id` validates parameters and auto-fills the header mapping
 5. **Future-proof**: If the implementation changes, your code doesn't need updates
 
 ### When to Use Direct Factory Access
@@ -320,20 +333,23 @@ This example shows how to extract adapter information from HTTP headers and inje
 from fastapi import Request, Response
 from model_hosting_container_standards.sagemaker import inject_adapter_id
 
-@inject_adapter_id(
-    request_shape={
-        "lora_id": None  # Value is automatically filled with the SageMaker header
-    }
-)
-async def inject_adapter_to_body(raw_request: Request):
-    """Inject adapter ID from header into request body for inference.
+# Replace mode example
+@inject_adapter_id("lora_id")
+async def inject_adapter_replace(raw_request: Request):
+    """Inject adapter ID from header into request body (replace mode).
 
-    This transformer modifies the request body in-place, adding the adapter ID
-    extracted from the X-Amzn-SageMaker-Adapter-Identifier header.
+    This transformer modifies the request body in-place, replacing the lora_id
+    field with the adapter ID from the X-Amzn-SageMaker-Adapter-Identifier header.
     """
     # The transformation has already modified raw_request._body
     # Just pass it through to the next handler
     return Response(status_code=200)
+
+# Append mode example
+@inject_adapter_id("model", append=True, separator=":")
+async def inject_adapter_append(raw_request: Request):
+    """Inject adapter ID using append mode."""
+    return Response(status_code=200)
 ```
 
 **SageMaker Request:**
@@ -488,7 +504,9 @@ bootstrap(app)
 
 1. **Use the Convenience Functions:** Always use `register_load_adapter_handler`, `register_unload_adapter_handler`, and `inject_adapter_id` from the `sagemaker` module instead of directly using `create_lora_transform_decorator`. They provide better error messages, validation, and automatic header handling.
 
-2. **Validate Adapter Sources:** Always validate that adapter sources are accessible and in the correct format (S3 paths, local paths, etc.).
+2. **Choose the Right Injection Mode:** Use `inject_adapter_id` replace mode (default) for most cases, but use append mode with appropriate separators for frameworks that expect concatenated model names.
+
+3. **Validate Adapter Sources:** Always validate that adapter sources are accessible and in the correct format (S3 paths, local paths, etc.).
 
 3. **Handle Adapter Loading Errors:** Wrap adapter loading in try-except blocks and return appropriate HTTP status codes:
    - 400 for invalid requests