From a7aa327946c41dc3690edc5ac36b7fa3fcfe0eb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= <ignasi.lopez.luna@gmail.com>
Date: Tue, 27 Jan 2026 10:15:45 +0100
Subject: [PATCH 1/5] feat(runtime): enhance runtime flag validation with
 backend allowlist and path safety checks

---
 pkg/inference/runtime_flags.go                |  29 +-
 pkg/inference/runtime_flags_allowlist.go      | 111 ++++++++
 pkg/inference/runtime_flags_allowlist_test.go | 202 +++++++++++++
 pkg/inference/runtime_flags_test.go           | 266 +++++++++++-------
 pkg/inference/scheduling/scheduler.go         |   4 +-
 5 files changed, 513 insertions(+), 99 deletions(-)
 create mode 100644 pkg/inference/runtime_flags_allowlist.go
 create mode 100644 pkg/inference/runtime_flags_allowlist_test.go

diff --git a/pkg/inference/runtime_flags.go b/pkg/inference/runtime_flags.go
index d0712c3e..dddb3e91 100644
--- a/pkg/inference/runtime_flags.go
+++ b/pkg/inference/runtime_flags.go
@@ -5,7 +5,32 @@ import (
 	"strings"
 )
 
-// ValidateRuntimeFlags ensures runtime flags don't contain paths (forward slash "/" or backslash "\")
+// ValidateRuntimeFlags validates runtime flags against the backend's allowlist
+// and checks for path characters as defense-in-depth.
+//
+// The allowlist is the primary defense - only explicitly permitted flags are accepted.
+// Path validation is secondary defense-in-depth against edge cases.
+// For unknown backends, it falls back to path-only validation (conservative).
+func ValidateRuntimeFlags(backendName string, flags []string) error {
+	// Get allowlist for this backend
+	allowedFlags := GetAllowedFlags(backendName)
+
+	// Check each flag against allowlist
+	for _, flag := range flags {
+		flagKey := ParseFlagKey(flag)
+		if flagKey == "" {
+			continue // Skip values, only validate flag keys
+		}
+		if !allowedFlags[flagKey] {
+			return fmt.Errorf("runtime flag %q is not allowed for backend %q", flagKey, backendName)
+		}
+	}
+
+	// still check for path characters in values
+	return validatePathSafety(flags)
+}
+
+// validatePathSafety ensures runtime flags don't contain paths (forward slash "/" or backslash "\")
 // to prevent malicious users from overwriting host files via arguments like
 // --log-file /some/path, --output-file /etc/passwd, or --log-file C:\Windows\file.
 //
@@ -17,7 +42,7 @@ import (
 // - UNC paths: \\network\share\file
 //
 // Returns an error if any flag contains a forward slash or backslash.
-func ValidateRuntimeFlags(flags []string) error {
+func validatePathSafety(flags []string) error {
 	for _, flag := range flags {
 		if strings.Contains(flag, "/") || strings.Contains(flag, "\\") {
 			return fmt.Errorf("invalid runtime flag %q: paths are not allowed (contains '/' or '\\\\')", flag)
diff --git a/pkg/inference/runtime_flags_allowlist.go b/pkg/inference/runtime_flags_allowlist.go
new file mode 100644
index 00000000..2702bebe
--- /dev/null
+++ b/pkg/inference/runtime_flags_allowlist.go
@@ -0,0 +1,111 @@
+package inference
+
+import "strings"
+
+// LlamaCppAllowedFlags contains safe flags for llama.cpp server
+var LlamaCppAllowedFlags = map[string]bool{
+	// Threading and performance
+	"-t": true, "--threads": true,
+	"-tb": true, "--threads-batch": true,
+
+	// Context and batching
+	"-c": true, "--ctx-size": true,
+	"-n": true, "--n-predict": true,
+	"-b": true, "--batch-size": true,
+	"-ub": true, "--ubatch-size": true,
+
+	// Sampling parameters
+	"--temp": true, "--temperature": true,
+	"--top-k": true, "--top-p": true, "--min-p": true,
+	"--repeat-last-n": true, "--repeat-penalty": true,
+	"--presence-penalty": true, "--frequency-penalty": true,
+	"--seed": true, "-s": true,
+
+	// GPU and memory
+	"-ngl": true, "--gpu-layers": true, "--n-gpu-layers": true,
+	"-sm": true, "--split-mode": true,
+	"-ts": true, "--tensor-split": true,
+	"-mg": true, "--main-gpu": true,
+	"--mlock": true, "--mmap": true, "--no-mmap": true,
+
+	// Server settings
+	"-np": true, "--parallel": true,
+	"--timeout": true, "-to": true,
+	"-cb": true, "--cont-batching": true,
+	"-fa": true, "--flash-attn": true,
+	"--cache-prompt": true,
+
+	// Mode flags (already handled but safe to allow)
+	"--embeddings": true, "--embedding": true,
+	"--reranking": true,
+	"--metrics":   true, "--no-metrics": true,
+	"--jinja": true,
+	"-v":      true, "--verbose": true,
+	"--reasoning-budget": true,
+
+	// RoPE scaling
+	"--rope-scaling": true, "--rope-scale": true,
+	"--rope-freq-base": true, "--rope-freq-scale": true,
+}
+
+// VLLMAllowedFlags contains safe flags for vLLM engine
+var VLLMAllowedFlags = map[string]bool{
+	// Parallelism
+	"--tensor-parallel-size": true, "-tp": true,
+	"--pipeline-parallel-size": true, "-pp": true,
+
+	// Model configuration
+	"--max-model-len":          true,
+	"--max-num-batched-tokens": true,
+	"--max-num-seqs":           true,
+	"--block-size":             true,
+	"--swap-space":             true,
+	"--seed":                   true,
+
+	// Data types and quantization
+	"--dtype":          true,
+	"--quantization":   true,
+	"-q":               true,
+	"--kv-cache-dtype": true,
+
+	// Performance flags
+	"--enforce-eager":             true,
+	"--enable-prefix-caching":     true,
+	"--enable-chunked-prefill":    true,
+	"--disable-custom-all-reduce": true,
+	"--use-v2-block-manager":      true,
+
+	// Tokenizer
+	"--tokenizer-mode":    true,
+	"--trust-remote-code": true,
+	"--max-logprobs":      true,
+
+	// Misc
+	"--revision":          true,
+	"--load-format":       true,
+	"--disable-log-stats": true,
+	"--served-model-name": true,
+}
+
+// AllowedFlags maps backend names to their allowed flag keys
+var AllowedFlags = map[string]map[string]bool{
+	"llama.cpp": LlamaCppAllowedFlags,
+	"vllm":      VLLMAllowedFlags,
+}
+
+// ParseFlagKey extracts the flag key from a flag string.
+// "--threads=4" -> "--threads", "-t" -> "-t", "4" -> ""
+func ParseFlagKey(flag string) string {
+	if !strings.HasPrefix(flag, "-") {
+		return "" // Not a flag, it's a value
+	}
+	if idx := strings.Index(flag, "="); idx != -1 {
+		return flag[:idx]
+	}
+	return flag
+}
+
+// GetAllowedFlags returns the allowlist for a backend, or nil if unknown
+func GetAllowedFlags(backendName string) map[string]bool {
+	return AllowedFlags[backendName]
+}
diff --git a/pkg/inference/runtime_flags_allowlist_test.go b/pkg/inference/runtime_flags_allowlist_test.go
new file mode 100644
index 00000000..2f9af9e2
--- /dev/null
+++ b/pkg/inference/runtime_flags_allowlist_test.go
@@ -0,0 +1,202 @@
+package inference
+
+import (
+	"testing"
+)
+
+func TestParseFlagKey(t *testing.T) {
+	tests := []struct {
+		name     string
+		flag     string
+		expected string
+	}{
+		{
+			name:     "long flag",
+			flag:     "--threads",
+			expected: "--threads",
+		},
+		{
+			name:     "short flag",
+			flag:     "-t",
+			expected: "-t",
+		},
+		{
+			name:     "long flag with equals",
+			flag:     "--threads=4",
+			expected: "--threads",
+		},
+		{
+			name:     "short flag with equals",
+			flag:     "-t=4",
+			expected: "-t",
+		},
+		{
+			name:     "value only (number)",
+			flag:     "4",
+			expected: "",
+		},
+		{
+			name:     "value only (string)",
+			flag:     "some-value",
+			expected: "",
+		},
+		{
+			name:     "empty string",
+			flag:     "",
+			expected: "",
+		},
+		{
+			name:     "long flag with complex value",
+			flag:     "--model-name=llama-3.2-1b",
+			expected: "--model-name",
+		},
+		{
+			name:     "flag with multiple equals",
+			flag:     "--config=key=value",
+			expected: "--config",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ParseFlagKey(tt.flag)
+			if result != tt.expected {
+				t.Errorf("ParseFlagKey(%q) = %q, want %q", tt.flag, result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestGetAllowedFlags(t *testing.T) {
+	tests := []struct {
+		name       string
+		backend    string
+		expectNil  bool
+		checkFlags []string // flags that should be in the allowlist
+	}{
+		{
+			name:       "llama.cpp backend",
+			backend:    "llama.cpp",
+			expectNil:  false,
+			checkFlags: []string{"--threads", "-t", "--ctx-size", "-ngl", "--verbose", "-v"},
+		},
+		{
+			name:       "vllm backend",
+			backend:    "vllm",
+			expectNil:  false,
+			checkFlags: []string{"--tensor-parallel-size", "-tp", "--max-model-len", "--dtype"},
+		},
+		{
+			name:      "unknown backend",
+			backend:   "unknown",
+			expectNil: true,
+		},
+		{
+			name:      "empty backend name",
+			backend:   "",
+			expectNil: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := GetAllowedFlags(tt.backend)
+
+			if tt.expectNil {
+				if result != nil {
+					t.Errorf("GetAllowedFlags(%q) expected nil, got %v", tt.backend, result)
+				}
+				return
+			}
+
+			if result == nil {
+				t.Fatalf("GetAllowedFlags(%q) returned nil, expected non-nil", tt.backend)
+			}
+
+			for _, flag := range tt.checkFlags {
+				if !result[flag] {
+					t.Errorf("GetAllowedFlags(%q) missing expected flag %q", tt.backend, flag)
+				}
+			}
+		})
+	}
+}
+
+func TestLlamaCppAllowedFlags(t *testing.T) {
+	expectedFlags := []string{
+		// Threading
+		"-t", "--threads", "-tb", "--threads-batch",
+		// Context
+		"-c", "--ctx-size", "-n", "--n-predict", "-b", "--batch-size", "-ub", "--ubatch-size",
+		// Sampling
+		"--temp", "--temperature", "--top-k", "--top-p", "--min-p",
+		"--repeat-last-n", "--repeat-penalty", "--presence-penalty", "--frequency-penalty",
+		"--seed", "-s",
+		// GPU
+		"-ngl", "--gpu-layers", "--n-gpu-layers", "-sm", "--split-mode",
+		"-ts", "--tensor-split", "-mg", "--main-gpu",
+		"--mlock", "--mmap", "--no-mmap",
+		// Server
+		"-np", "--parallel", "--timeout", "-to",
+		"-cb", "--cont-batching", "-fa", "--flash-attn", "--cache-prompt",
+		// Mode
+		"--embeddings", "--embedding", "--reranking",
+		"--metrics", "--no-metrics", "--jinja",
+		"-v", "--verbose", "--reasoning-budget",
+		// RoPE
+		"--rope-scaling", "--rope-scale", "--rope-freq-base", "--rope-freq-scale",
+	}
+
+	for _, flag := range expectedFlags {
+		if !LlamaCppAllowedFlags[flag] {
+			t.Errorf("LlamaCppAllowedFlags missing expected flag %q", flag)
+		}
+	}
+}
+
+func TestVLLMAllowedFlags(t *testing.T) {
+	expectedFlags := []string{
+		// Parallelism
+		"--tensor-parallel-size", "-tp", "--pipeline-parallel-size", "-pp",
+		// Model config
+		"--max-model-len", "--max-num-batched-tokens", "--max-num-seqs",
+		"--block-size", "--swap-space", "--seed",
+		// Data types
+		"--dtype", "--quantization", "-q", "--kv-cache-dtype",
+		// Performance
+		"--enforce-eager", "--enable-prefix-caching", "--enable-chunked-prefill",
+		"--disable-custom-all-reduce", "--use-v2-block-manager",
+		// Tokenizer
+		"--tokenizer-mode", "--trust-remote-code", "--max-logprobs",
+		// Misc
+		"--revision", "--load-format", "--disable-log-stats", "--served-model-name",
+	}
+
+	for _, flag := range expectedFlags {
+		if !VLLMAllowedFlags[flag] {
+			t.Errorf("VLLMAllowedFlags missing expected flag %q", flag)
+		}
+	}
+}
+
+func TestDangerousFlagsNotAllowed(t *testing.T) {
+	// Ensure dangerous flags are NOT in the allowlists
+	dangerousFlags := []string{
+		"--log-file",
+		"--output-file",
+		"--model-path",
+		"--config-file",
+		"--lora-path",
+		"--grammar-file",
+		"--prompt-file",
+	}
+
+	for _, flag := range dangerousFlags {
+		if LlamaCppAllowedFlags[flag] {
+			t.Errorf("Dangerous flag %q should not be in LlamaCppAllowedFlags", flag)
+		}
+		if VLLMAllowedFlags[flag] {
+			t.Errorf("Dangerous flag %q should not be in VLLMAllowedFlags", flag)
+		}
+	}
+}
diff --git a/pkg/inference/runtime_flags_test.go b/pkg/inference/runtime_flags_test.go
index 0f2543aa..0ec68c57 100644
--- a/pkg/inference/runtime_flags_test.go
+++ b/pkg/inference/runtime_flags_test.go
@@ -7,159 +7,170 @@ import (
 func TestValidateRuntimeFlags(t *testing.T) {
 	tests := []struct {
 		name        string
+		backend     string
 		flags       []string
 		expectError bool
 		description string
 	}{
+		// Tests for llama.cpp backend with allowlist
 		{
-			name:        "empty flags",
+			name:        "llama.cpp: empty flags",
+			backend:     "llama.cpp",
 			flags:       []string{},
 			expectError: false,
 			description: "Empty array should pass validation",
 		},
 		{
-			name:        "nil flags",
+			name:        "llama.cpp: nil flags",
+			backend:     "llama.cpp",
 			flags:       nil,
 			expectError: false,
 			description: "Nil array should pass validation",
 		},
 		{
-			name:        "valid flags without paths",
-			flags:       []string{"--verbose", "--debug", "--threads", "4"},
+			name:        "llama.cpp: valid allowed flags",
+			backend:     "llama.cpp",
+			flags:       []string{"--verbose", "--threads", "4"},
 			expectError: false,
-			description: "Simple flags without paths should pass",
+			description: "Allowed flags should pass",
 		},
 		{
-			name:        "valid single character flags",
-			flags:       []string{"-v", "-d", "-t", "4"},
+			name:        "llama.cpp: valid single character flags",
+			backend:     "llama.cpp",
+			flags:       []string{"-v", "-t", "4"},
 			expectError: false,
-			description: "Single character flags should pass",
+			description: "Single character allowed flags should pass",
 		},
 		{
-			name:        "valid flags with numbers and hyphens",
-			flags:       []string{"--gpu-memory-utilization", "0.9", "--max-tokens", "1024"},
+			name:        "llama.cpp: flag with equals format",
+			backend:     "llama.cpp",
+			flags:       []string{"--threads=4", "--ctx-size=2048"},
 			expectError: false,
-			description: "Flags with hyphens and numeric values should pass",
-		},
-		{
-			name:        "reject absolute path in value",
-			flags:       []string{"--log-file", "/var/log/model.log"},
-			expectError: true,
-			description: "Absolute paths should be rejected",
-		},
-		{
-			name:        "reject absolute path in flag=value format",
-			flags:       []string{"--log-file=/var/log/model.log"},
-			expectError: true,
-			description: "Paths in flag=value format should be rejected",
+			description: "Flags with = format should pass",
 		},
 		{
-			name:        "reject relative path with parent directory",
-			flags:       []string{"--output", "../file.txt"},
+			name:        "llama.cpp: reject non-allowed flag",
+			backend:     "llama.cpp",
+			flags:       []string{"--log-file", "test.log"},
 			expectError: true,
-			description: "Relative paths with ../ should be rejected",
+			description: "Non-allowed flags should be rejected",
 		},
 		{
-			name:        "reject relative path with current directory",
-			flags:       []string{"--config", "./config.yaml"},
+			name:        "llama.cpp: reject path in allowed flag value",
+			backend:     "llama.cpp",
+			flags:       []string{"--threads", "/etc/passwd"},
 			expectError: true,
-			description: "Relative paths with ./ should be rejected",
+			description: "Paths in values should be rejected",
 		},
 		{
-			name:        "reject Windows-style path with forward slash",
-			flags:       []string{"--file", "C:/Users/file.txt"},
+			name:        "llama.cpp: reject path in flag=value format",
+			backend:     "llama.cpp",
+			flags:       []string{"--threads=/var/log/test"},
 			expectError: true,
-			description: "Windows-style paths with forward slash should be rejected",
+			description: "Paths in flag=value format should be rejected",
 		},
 		{
-			name:        "reject Windows-style path with backslash",
-			flags:       []string{"--file", "C:\\Users\\file.txt"},
-			expectError: true,
-			description: "Windows-style paths with backslash should be rejected",
+			name:        "llama.cpp: multiple allowed flags",
+			backend:     "llama.cpp",
+			flags:       []string{"--threads", "4", "--ctx-size", "2048", "--verbose", "--flash-attn"},
+			expectError: false,
+			description: "Multiple allowed flags should pass",
 		},
 		{
-			name:        "reject Windows relative path with backslash",
-			flags:       []string{"--config", "..\\config.yaml"},
-			expectError: true,
-			description: "Windows relative paths with backslash should be rejected",
+			name:        "llama.cpp: GPU flags allowed",
+			backend:     "llama.cpp",
+			flags:       []string{"-ngl", "99", "--main-gpu", "0"},
+			expectError: false,
+			description: "GPU-related flags should be allowed",
 		},
 		{
-			name:        "reject Windows current directory path",
-			flags:       []string{"--output", ".\\output.txt"},
-			expectError: true,
-			description: "Windows current directory paths should be rejected",
+			name:        "llama.cpp: sampling flags allowed",
+			backend:     "llama.cpp",
+			flags:       []string{"--temp", "0.7", "--top-p", "0.9", "--seed", "42"},
+			expectError: false,
+			description: "Sampling flags should be allowed",
 		},
+
+		// Tests for vLLM backend with allowlist
 		{
-			name:        "reject UNC network path",
-			flags:       []string{"--share", "\\\\server\\share\\file.txt"},
-			expectError: true,
-			description: "UNC network paths should be rejected",
+			name:        "vllm: valid allowed flags",
+			backend:     "vllm",
+			flags:       []string{"--tensor-parallel-size", "2", "--max-model-len", "4096"},
+			expectError: false,
+			description: "Allowed vLLM flags should pass",
 		},
 		{
-			name:        "reject Windows system path",
-			flags:       []string{"--log", "C:\\Windows\\System32\\log.txt"},
+			name:        "vllm: reject non-allowed flag",
+			backend:     "vllm",
+			flags:       []string{"--output-file", "test.log"},
 			expectError: true,
-			description: "Windows system paths should be rejected",
+			description: "Non-allowed flags should be rejected for vLLM",
 		},
 		{
-			name:        "reject URL with http",
-			flags:       []string{"--endpoint", "http://example.com/api"},
-			expectError: true,
-			description: "URLs should be rejected (conservative approach)",
+			name:        "vllm: short flags allowed",
+			backend:     "vllm",
+			flags:       []string{"-tp", "2", "-q", "awq"},
+			expectError: false,
+			description: "Short vLLM flags should be allowed",
 		},
+
+		// Tests for unknown backend
 		{
-			name:        "reject URL with https",
-			flags:       []string{"--api-url", "https://api.example.com/v1"},
+			name:        "unknown backend: valid flags without paths",
+			backend:     "unknown-backend",
+			flags:       []string{"--verbose", "--debug", "--threads", "4"},
 			expectError: true,
-			description: "HTTPS URLs should be rejected (conservative approach)",
+			description: "Unknown backend should reject all flags (no allowlist)",
 		},
+
+		// Path safety tests (defense-in-depth)
 		{
-			name:        "reject path in middle of flag list",
-			flags:       []string{"--verbose", "--log-file", "/tmp/log.txt", "--debug"},
+			name:        "llama.cpp: reject relative path with parent directory",
+			backend:     "llama.cpp",
+			flags:       []string{"--threads", "../file.txt"},
 			expectError: true,
-			description: "Path anywhere in flag list should be rejected",
+			description: "Relative paths with ../ should be rejected",
 		},
 		{
-			name:        "reject multiple paths",
-			flags:       []string{"--input", "/path/to/input", "--output", "/path/to/output"},
+			name:        "llama.cpp: reject relative path with current directory",
+			backend:     "llama.cpp",
+			flags:       []string{"--threads", "./config.yaml"},
 			expectError: true,
-			description: "Multiple paths should be rejected",
+			description: "Relative paths with ./ should be rejected",
 		},
 		{
-			name:        "reject path traversal attempt",
-			flags:       []string{"--file", "../../etc/passwd"},
+			name:        "llama.cpp: reject Windows-style path with backslash",
+			backend:     "llama.cpp",
+			flags:       []string{"--threads", "C:\\Users\\file.txt"},
 			expectError: true,
-			description: "Path traversal attempts should be rejected",
+			description: "Windows-style paths with backslash should be rejected",
 		},
 		{
-			name:        "reject root directory",
-			flags:       []string{"--root", "/"},
+			name:        "llama.cpp: reject UNC network path",
+			backend:     "llama.cpp",
+			flags:       []string{"--threads", "\\\\server\\share\\file.txt"},
 			expectError: true,
-			description: "Root directory should be rejected",
+			description: "UNC network paths should be rejected",
 		},
 		{
-			name:        "reject home directory path",
-			flags:       []string{"--home", "/home/user/.config"},
+			name:        "llama.cpp: reject URL with http",
+			backend:     "llama.cpp",
+			flags:       []string{"--threads", "http://example.com/api"},
 			expectError: true,
-			description: "Home directory paths should be rejected",
-		},
-		{
-			name:        "valid flag with special characters except slash",
-			flags:       []string{"--model-name", "llama-3.2-1b", "--temperature", "0.7"},
-			expectError: false,
-			description: "Flags with dots, hyphens, and numbers (no slash) should pass",
+			description: "URLs should be rejected (conservative approach)",
 		},
 		{
-			name:        "valid flag with underscore",
-			flags:       []string{"--max_tokens", "512", "--use_cache"},
+			name:        "llama.cpp: valid flag with special characters except slash",
+			backend:     "llama.cpp",
+			flags:       []string{"--temp", "0.7", "--seed", "42"},
 			expectError: false,
-			description: "Flags with underscores should pass",
+			description: "Flags with dots and numbers (no slash) should pass",
 		},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			err := ValidateRuntimeFlags(tt.flags)
+			err := ValidateRuntimeFlags(tt.backend, tt.flags)
 
 			if tt.expectError {
 				if err == nil {
@@ -174,21 +185,86 @@ func TestValidateRuntimeFlags(t *testing.T) {
 	}
 }
 
-func TestValidateRuntimeFlags_ErrorMessage(t *testing.T) {
-	// Test that error messages are helpful
-	flags := []string{"--log-file", "/var/log/test.log"}
-	err := ValidateRuntimeFlags(flags)
+func TestValidateRuntimeFlags_ErrorMessages(t *testing.T) {
+	// Test that allowlist error messages are helpful
+	t.Run("allowlist rejection message", func(t *testing.T) {
+		err := ValidateRuntimeFlags("llama.cpp", []string{"--log-file", "test.log"})
+		if err == nil {
+			t.Fatal("Expected error but got none")
+		}
 
-	if err == nil {
-		t.Fatal("Expected error but got none")
-	}
+		errMsg := err.Error()
+		if !contains(errMsg, "--log-file") {
+			t.Errorf("Error message should contain the offending flag, got: %s", errMsg)
+		}
+		if !contains(errMsg, "not allowed") {
+			t.Errorf("Error message should explain rejection, got: %s", errMsg)
+		}
+		if !contains(errMsg, "llama.cpp") {
+			t.Errorf("Error message should mention the backend, got: %s", errMsg)
+		}
+	})
+
+	// Test that path safety error messages are helpful
+	t.Run("path rejection message", func(t *testing.T) {
+		err := ValidateRuntimeFlags("llama.cpp", []string{"--threads", "/var/log/test.log"})
+		if err == nil {
+			t.Fatal("Expected error but got none")
+		}
+
+		errMsg := err.Error()
+		if !contains(errMsg, "/var/log/test.log") {
+			t.Errorf("Error message should contain the offending value, got: %s", errMsg)
+		}
+		if !contains(errMsg, "paths are not allowed") {
+			t.Errorf("Error message should explain why it failed, got: %s", errMsg)
+		}
+	})
+}
 
-	errMsg := err.Error()
-	if !contains(errMsg, "/var/log/test.log") {
-		t.Errorf("Error message should contain the offending flag value, got: %s", errMsg)
+func TestValidatePathSafety(t *testing.T) {
+	tests := []struct {
+		name        string
+		flags       []string
+		expectError bool
+	}{
+		{
+			name:        "no paths",
+			flags:       []string{"--verbose", "--threads", "4"},
+			expectError: false,
+		},
+		{
+			name:        "forward slash",
+			flags:       []string{"--file", "/etc/passwd"},
+			expectError: true,
+		},
+		{
+			name:        "backslash",
+			flags:       []string{"--file", "C:\\Windows\\file"},
+			expectError: true,
+		},
+		{
+			name:        "relative path forward",
+			flags:       []string{"../file"},
+			expectError: true,
+		},
+		{
+			name:        "relative path backward",
+			flags:       []string{"..\\file"},
+			expectError: true,
+		},
 	}
-	if !contains(errMsg, "paths are not allowed") {
-		t.Errorf("Error message should explain why it failed, got: %s", errMsg)
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := validatePathSafety(tt.flags)
+			if tt.expectError && err == nil {
+				t.Error("expected error but got none")
+			}
+			if !tt.expectError && err != nil {
+				t.Errorf("unexpected error: %v", err)
+			}
+		})
 	}
 }
 
diff --git a/pkg/inference/scheduling/scheduler.go b/pkg/inference/scheduling/scheduler.go
index 631b8218..33db3834 100644
--- a/pkg/inference/scheduling/scheduler.go
+++ b/pkg/inference/scheduling/scheduler.go
@@ -246,8 +246,8 @@ func (s *Scheduler) ConfigureRunner(ctx context.Context, backend inference.Backe
 		}
 	}
 
-	// Validate runtime flags to prevent path-based security issues
-	if err := inference.ValidateRuntimeFlags(runtimeFlags); err != nil {
+	// Validate runtime flags against backend allowlist and path safety
+	if err := inference.ValidateRuntimeFlags(backend.Name(), runtimeFlags); err != nil {
 		return nil, err
 	}
 

From b64f0d9b2278cae43b23c761d2bfd6e287ec8efc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= <ignasi.lopez.luna@gmail.com>
Date: Tue, 27 Jan 2026 10:19:48 +0100
Subject: [PATCH 2/5] feat(runtime): add KV cache flags to runtime flags
 allowlist and update tests

---
 pkg/inference/runtime_flags_allowlist.go      |  3 +++
 pkg/inference/runtime_flags_allowlist_test.go |  2 ++
 pkg/inference/runtime_flags_test.go           | 19 +++++++++++++++++++
 3 files changed, 24 insertions(+)

diff --git a/pkg/inference/runtime_flags_allowlist.go b/pkg/inference/runtime_flags_allowlist.go
index 2702bebe..b207ec00 100644
--- a/pkg/inference/runtime_flags_allowlist.go
+++ b/pkg/inference/runtime_flags_allowlist.go
@@ -35,6 +35,9 @@ var LlamaCppAllowedFlags = map[string]bool{
 	"-fa": true, "--flash-attn": true,
 	"--cache-prompt": true,
 
+	// KV cache quantization
+	"--cache-type-k": true, "--cache-type-v": true,
+
 	// Mode flags (already handled but safe to allow)
 	"--embeddings": true, "--embedding": true,
 	"--reranking": true,
diff --git a/pkg/inference/runtime_flags_allowlist_test.go b/pkg/inference/runtime_flags_allowlist_test.go
index 2f9af9e2..a9ffd1cc 100644
--- a/pkg/inference/runtime_flags_allowlist_test.go
+++ b/pkg/inference/runtime_flags_allowlist_test.go
@@ -139,6 +139,8 @@ func TestLlamaCppAllowedFlags(t *testing.T) {
 		// Server
 		"-np", "--parallel", "--timeout", "-to",
 		"-cb", "--cont-batching", "-fa", "--flash-attn", "--cache-prompt",
+		// KV cache
+		"--cache-type-k", "--cache-type-v",
 		// Mode
 		"--embeddings", "--embedding", "--reranking",
 		"--metrics", "--no-metrics", "--jinja",
diff --git a/pkg/inference/runtime_flags_test.go b/pkg/inference/runtime_flags_test.go
index 0ec68c57..7439cbd8 100644
--- a/pkg/inference/runtime_flags_test.go
+++ b/pkg/inference/runtime_flags_test.go
@@ -90,6 +90,25 @@ func TestValidateRuntimeFlags(t *testing.T) {
 			expectError: false,
 			description: "Sampling flags should be allowed",
 		},
+		{
+			name:    "llama.cpp: real-world flags from issue 515",
+			backend: "llama.cpp",
+			flags: []string{
+				"--n-gpu-layers", "99",
+				"--jinja",
+				"--top-p", "0.8",
+				"--top-k", "20",
+				"--temp", "0.7",
+				"--min-p", "0.0",
+				"--presence-penalty", "1.5",
+				"--no-mmap",
+				"--flash-attn",
+				"--cache-type-k", "q8_0",
+				"--cache-type-v", "q8_0",
+			},
+			expectError: false,
+			description: "Real-world flags from GitHub issue 515 should be allowed",
+		},
 
 		// Tests for vLLM backend with allowlist
 		{

From d4db5544a0c1ae3c874258e206ad5caf95316fe3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= <ignasi.lopez.luna@gmail.com>
Date: Tue, 27 Jan 2026 10:34:00 +0100
Subject: [PATCH 3/5] feat(runtime): expand allowed flags for llama.cpp and
 vLLM backends with additional categories and safety checks

---
 pkg/inference/runtime_flags.go                |   4 -
 pkg/inference/runtime_flags_allowlist.go      | 186 +++++++++++++++---
 pkg/inference/runtime_flags_allowlist_test.go | 167 +++++++++++-----
 3 files changed, 269 insertions(+), 88 deletions(-)

diff --git a/pkg/inference/runtime_flags.go b/pkg/inference/runtime_flags.go
index dddb3e91..33120fc3 100644
--- a/pkg/inference/runtime_flags.go
+++ b/pkg/inference/runtime_flags.go
@@ -7,10 +7,6 @@ import (
 
 // ValidateRuntimeFlags validates runtime flags against the backend's allowlist
 // and checks for path characters as defense-in-depth.
-//
-// The allowlist is the primary defense - only explicitly permitted flags are accepted.
-// Path validation is secondary defense-in-depth against edge cases.
-// For unknown backends, it falls back to path-only validation (conservative).
 func ValidateRuntimeFlags(backendName string, flags []string) error {
 	// Get allowlist for this backend
 	allowedFlags := GetAllowedFlags(backendName)
diff --git a/pkg/inference/runtime_flags_allowlist.go b/pkg/inference/runtime_flags_allowlist.go
index b207ec00..fe9fa43e 100644
--- a/pkg/inference/runtime_flags_allowlist.go
+++ b/pkg/inference/runtime_flags_allowlist.go
@@ -2,56 +2,175 @@ package inference
 
 import "strings"
 
-// LlamaCppAllowedFlags contains safe flags for llama.cpp server
+// LlamaCppAllowedFlags contains safe flags for llama.cpp server.
+// This list is based on llama.cpp server documentation.
+// Flags involving file paths are intentionally excluded for security.
 var LlamaCppAllowedFlags = map[string]bool{
-	// Threading and performance
+	// Threading and CPU control
 	"-t": true, "--threads": true,
 	"-tb": true, "--threads-batch": true,
-
-	// Context and batching
+	"-C": true, "--cpu-mask": true,
+	"-Cr": true, "--cpu-range": true,
+	"--cpu-strict": true,
+	"--prio":       true,
+	"--poll":       true,
+	"-Cb":          true, "--cpu-mask-batch": true,
+	"-Crb": true, "--cpu-range-batch": true,
+	"--cpu-strict-batch": true,
+	"--prio-batch":       true,
+	"--poll-batch":       true,
+
+	// Context and prediction
 	"-c": true, "--ctx-size": true,
-	"-n": true, "--n-predict": true,
+	"-n": true, "--predict": true, "--n-predict": true,
+	"--keep": true,
+
+	// Batching and performance
 	"-b": true, "--batch-size": true,
 	"-ub": true, "--ubatch-size": true,
+	"--swa-full": true,
+	"-fa":        true, "--flash-attn": true,
+	"--perf": true, "--no-perf": true,
 
 	// Sampling parameters
+	"--samplers": true,
+	"-s":         true, "--seed": true,
 	"--temp": true, "--temperature": true,
-	"--top-k": true, "--top-p": true, "--min-p": true,
-	"--repeat-last-n": true, "--repeat-penalty": true,
-	"--presence-penalty": true, "--frequency-penalty": true,
-	"--seed": true, "-s": true,
-
-	// GPU and memory
+	"--top-k":              true,
+	"--top-p":              true,
+	"--min-p":              true,
+	"--top-nsigma":         true,
+	"--xtc-probability":    true,
+	"--xtc-threshold":      true,
+	"--typical":            true,
+	"--repeat-last-n":      true,
+	"--repeat-penalty":     true,
+	"--presence-penalty":   true,
+	"--frequency-penalty":  true,
+	"--dry-multiplier":     true,
+	"--dry-base":           true,
+	"--dry-allowed-length": true,
+	"--dry-penalty-last-n": true,
+	"--mirostat":           true,
+	"--mirostat-lr":        true,
+	"--mirostat-ent":       true,
+	"--ignore-eos":         true,
+	"--dynatemp-range":     true,
+	"--dynatemp-exp":       true,
+
+	// GPU and device management
+	"-dev": true, "--device": true,
 	"-ngl": true, "--gpu-layers": true, "--n-gpu-layers": true,
 	"-sm": true, "--split-mode": true,
 	"-ts": true, "--tensor-split": true,
 	"-mg": true, "--main-gpu": true,
-	"--mlock": true, "--mmap": true, "--no-mmap": true,
+	"-fit": true, "--fit": true,
+	"-fitt": true, "--fit-target": true,
+	"-fitc": true, "--fit-ctx": true,
+
+	// Memory and caching
+	"-kvo": true, "--kv-offload": true,
+	"-nkvo": true, "--no-kv-offload": true,
+	"--repack": true, "-nr": true, "--no-repack": true,
+	"--no-host": true,
+	"-ctk":      true, "--cache-type-k": true,
+	"-ctv": true, "--cache-type-v": true,
+	"--mlock": true,
+	"--mmap":  true, "--no-mmap": true,
+	"-dio": true, "--direct-io": true,
+	"-ndio": true, "--no-direct-io": true,
+	"-cram": true, "--cache-ram": true,
+	"-kvu": true, "--kv-unified": true,
+	"--context-shift": true, "--no-context-shift": true,
 
-	// Server settings
+	// RoPE scaling
+	"--rope-scaling":     true,
+	"--rope-scale":       true,
+	"--rope-freq-base":   true,
+	"--rope-freq-scale":  true,
+	"--yarn-orig-ctx":    true,
+	"--yarn-ext-factor":  true,
+	"--yarn-attn-factor": true,
+	"--yarn-beta-slow":   true,
+	"--yarn-beta-fast":   true,
+
+	// Server configuration
 	"-np": true, "--parallel": true,
-	"--timeout": true, "-to": true,
 	"-cb": true, "--cont-batching": true,
-	"-fa": true, "--flash-attn": true,
-	"--cache-prompt": true,
-
-	// KV cache quantization
-	"--cache-type-k": true, "--cache-type-v": true,
-
-	// Mode flags (already handled but safe to allow)
-	"--embeddings": true, "--embedding": true,
-	"--reranking": true,
-	"--metrics":   true, "--no-metrics": true,
-	"--jinja": true,
-	"-v":      true, "--verbose": true,
-	"--reasoning-budget": true,
-
-	// RoPE scaling
-	"--rope-scaling": true, "--rope-scale": true,
-	"--rope-freq-base": true, "--rope-freq-scale": true,
+	"-nocb": true, "--no-cont-batching": true,
+	"--warmup": true, "--no-warmup": true,
+	"-to": true, "--timeout": true,
+	"--threads-http":       true,
+	"--cache-prompt":       true,
+	"--no-cache-prompt":    true,
+	"--cache-reuse":        true,
+	"--sleep-idle-seconds": true,
+
+	// Multimodal (safe flags only - no file paths)
+	"--mmproj-auto": true, "--no-mmproj": true, "--no-mmproj-auto": true,
+	"--mmproj-offload": true, "--no-mmproj-offload": true,
+	"--image-min-tokens": true,
+	"--image-max-tokens": true,
+	"--spm-infill":       true,
+
+	// Speculative decoding (safe flags only - no file paths)
+	"--draft": true, "--draft-n": true, "--draft-max": true,
+	"--draft-min": true, "--draft-n-min": true,
+	"--draft-p-min": true,
+	"-cd":           true, "--ctx-size-draft": true,
+	"-devd": true, "--device-draft": true,
+	"-ngld": true, "--gpu-layers-draft": true, "--n-gpu-layers-draft": true,
+	"-td": true, "--threads-draft": true,
+	"-tbd": true, "--threads-batch-draft": true,
+
+	// LoRA (safe flags only - no file paths)
+	"--lora-init-without-apply": true,
+
+	// Control vectors (safe flags only - no file paths)
+	"--control-vector-layer-range": true,
+
+	// Grammar and constraints (safe flags only - no file paths)
+	"--grammar": true,
+	"-j":        true, "--json-schema": true,
+	"-bs": true, "--backend-sampling": true,
+
+	// Template and format control (safe flags only - no file paths)
+	"--chat-template":        true,
+	"--chat-template-kwargs": true,
+	"--jinja":                true, "--no-jinja": true,
+	"--pooling":              true,
+	"--reasoning-format":     true,
+	"--reasoning-budget":     true,
+	"--prefill-assistant":    true,
+	"--no-prefill-assistant": true,
+
+	// Web interface and API (safe flags only - no file paths)
+	"--api-prefix": true,
+	"--webui":      true, "--no-webui": true,
+	"--webui-config": true,
+	"--api-key":      true,
+	"--metrics":      true,
+	"--no-metrics":   true,
+	"--props":        true,
+	"--slots":        true, "--no-slots": true,
+
+	// Embedding and specialized
+	"--embedding": true, "--embeddings": true,
+	"--rerank": true, "--reranking": true,
+	"-sps": true, "--slot-prompt-similarity": true,
+
+	// Tensor and computation (safe flags only)
+	"-cmoe": true, "--cpu-moe": true,
+	"-ncmoe": true, "--n-cpu-moe": true,
+	"--check-tensors": true,
+	"--op-offload":    true, "--no-op-offload": true,
+
+	// Verbose/debug
+	"-v": true, "--verbose": true,
 }
 
-// VLLMAllowedFlags contains safe flags for vLLM engine
+// VLLMAllowedFlags contains safe flags for vLLM engine.
+// Flags involving file paths are intentionally excluded for security.
 var VLLMAllowedFlags = map[string]bool{
 	// Parallelism
 	"--tensor-parallel-size": true, "-tp": true,
@@ -88,6 +207,9 @@ var VLLMAllowedFlags = map[string]bool{
 	"--load-format":       true,
 	"--disable-log-stats": true,
 	"--served-model-name": true,
+
+	// GPU memory
+	"--gpu-memory-utilization": true,
 }
 
 // AllowedFlags maps backend names to their allowed flag keys
diff --git a/pkg/inference/runtime_flags_allowlist_test.go b/pkg/inference/runtime_flags_allowlist_test.go
index a9ffd1cc..d32ece67 100644
--- a/pkg/inference/runtime_flags_allowlist_test.go
+++ b/pkg/inference/runtime_flags_allowlist_test.go
@@ -78,13 +78,13 @@ func TestGetAllowedFlags(t *testing.T) {
 			name:       "llama.cpp backend",
 			backend:    "llama.cpp",
 			expectNil:  false,
-			checkFlags: []string{"--threads", "-t", "--ctx-size", "-ngl", "--verbose", "-v"},
+			checkFlags: []string{"--threads", "-t", "--ctx-size", "-ngl", "--verbose", "-v", "--cache-type-k", "--cache-type-v"},
 		},
 		{
 			name:       "vllm backend",
 			backend:    "vllm",
 			expectNil:  false,
-			checkFlags: []string{"--tensor-parallel-size", "-tp", "--max-model-len", "--dtype"},
+			checkFlags: []string{"--tensor-parallel-size", "-tp", "--max-model-len", "--dtype", "--gpu-memory-utilization"},
 		},
 		{
 			name:      "unknown backend",
@@ -122,68 +122,88 @@ func TestGetAllowedFlags(t *testing.T) {
 	}
 }
 
-func TestLlamaCppAllowedFlags(t *testing.T) {
-	expectedFlags := []string{
-		// Threading
-		"-t", "--threads", "-tb", "--threads-batch",
-		// Context
-		"-c", "--ctx-size", "-n", "--n-predict", "-b", "--batch-size", "-ub", "--ubatch-size",
-		// Sampling
-		"--temp", "--temperature", "--top-k", "--top-p", "--min-p",
-		"--repeat-last-n", "--repeat-penalty", "--presence-penalty", "--frequency-penalty",
-		"--seed", "-s",
-		// GPU
-		"-ngl", "--gpu-layers", "--n-gpu-layers", "-sm", "--split-mode",
-		"-ts", "--tensor-split", "-mg", "--main-gpu",
-		"--mlock", "--mmap", "--no-mmap",
-		// Server
-		"-np", "--parallel", "--timeout", "-to",
-		"-cb", "--cont-batching", "-fa", "--flash-attn", "--cache-prompt",
-		// KV cache
-		"--cache-type-k", "--cache-type-v",
-		// Mode
-		"--embeddings", "--embedding", "--reranking",
-		"--metrics", "--no-metrics", "--jinja",
-		"-v", "--verbose", "--reasoning-budget",
-		// RoPE
-		"--rope-scaling", "--rope-scale", "--rope-freq-base", "--rope-freq-scale",
+func TestLlamaCppAllowedFlags_Categories(t *testing.T) {
+	// Test that key flags from each category are present
+	categories := map[string][]string{
+		"threading": {"-t", "--threads", "-tb", "--threads-batch", "-C", "--cpu-mask", "--prio"},
+		"context":   {"-c", "--ctx-size", "-n", "--n-predict", "--keep"},
+		"batching":  {"-b", "--batch-size", "-ub", "--ubatch-size", "-fa", "--flash-attn"},
+		"sampling": {
+			"--samplers", "-s", "--seed", "--temp", "--temperature",
+			"--top-k", "--top-p", "--min-p", "--typical",
+			"--repeat-last-n", "--repeat-penalty",
+			"--presence-penalty", "--frequency-penalty",
+			"--mirostat", "--mirostat-lr", "--mirostat-ent",
+			"--dynatemp-range", "--dynatemp-exp",
+		},
+		"gpu": {
+			"-ngl", "--gpu-layers", "--n-gpu-layers",
+			"-sm", "--split-mode", "-ts", "--tensor-split",
+			"-mg", "--main-gpu", "-dev", "--device",
+		},
+		"memory": {
+			"--mlock", "--mmap", "--no-mmap",
+			"-ctk", "--cache-type-k", "-ctv", "--cache-type-v",
+			"-kvo", "--kv-offload", "-nkvo", "--no-kv-offload",
+			"-cram", "--cache-ram",
+		},
+		"rope": {
+			"--rope-scaling", "--rope-scale",
+			"--rope-freq-base", "--rope-freq-scale",
+			"--yarn-orig-ctx", "--yarn-ext-factor",
+		},
+		"server": {
+			"-np", "--parallel", "-to", "--timeout",
+			"-cb", "--cont-batching", "--cache-prompt",
+			"--threads-http", "--warmup", "--no-warmup",
+		},
+		"mode": {
+			"--embeddings", "--embedding", "--reranking", "--rerank",
+			"--metrics", "--no-metrics", "--jinja", "--no-jinja",
+		},
+		"speculative": {
+			"--draft", "--draft-max", "--draft-min",
+			"-cd", "--ctx-size-draft",
+			"-ngld", "--gpu-layers-draft",
+		},
 	}
 
-	for _, flag := range expectedFlags {
-		if !LlamaCppAllowedFlags[flag] {
-			t.Errorf("LlamaCppAllowedFlags missing expected flag %q", flag)
-		}
+	for category, flags := range categories {
+		t.Run(category, func(t *testing.T) {
+			for _, flag := range flags {
+				if !LlamaCppAllowedFlags[flag] {
+					t.Errorf("LlamaCppAllowedFlags missing %s flag %q", category, flag)
+				}
+			}
+		})
 	}
 }
 
-func TestVLLMAllowedFlags(t *testing.T) {
-	expectedFlags := []string{
-		// Parallelism
-		"--tensor-parallel-size", "-tp", "--pipeline-parallel-size", "-pp",
-		// Model config
-		"--max-model-len", "--max-num-batched-tokens", "--max-num-seqs",
-		"--block-size", "--swap-space", "--seed",
-		// Data types
-		"--dtype", "--quantization", "-q", "--kv-cache-dtype",
-		// Performance
-		"--enforce-eager", "--enable-prefix-caching", "--enable-chunked-prefill",
-		"--disable-custom-all-reduce", "--use-v2-block-manager",
-		// Tokenizer
-		"--tokenizer-mode", "--trust-remote-code", "--max-logprobs",
-		// Misc
-		"--revision", "--load-format", "--disable-log-stats", "--served-model-name",
+func TestVLLMAllowedFlags_Categories(t *testing.T) {
+	categories := map[string][]string{
+		"parallelism": {"--tensor-parallel-size", "-tp", "--pipeline-parallel-size", "-pp"},
+		"model":       {"--max-model-len", "--max-num-batched-tokens", "--max-num-seqs", "--block-size", "--swap-space", "--seed"},
+		"dtype":       {"--dtype", "--quantization", "-q", "--kv-cache-dtype"},
+		"performance": {"--enforce-eager", "--enable-prefix-caching", "--enable-chunked-prefill"},
+		"tokenizer":   {"--tokenizer-mode", "--trust-remote-code", "--max-logprobs"},
+		"misc":        {"--revision", "--load-format", "--disable-log-stats", "--served-model-name", "--gpu-memory-utilization"},
 	}
 
-	for _, flag := range expectedFlags {
-		if !VLLMAllowedFlags[flag] {
-			t.Errorf("VLLMAllowedFlags missing expected flag %q", flag)
-		}
+	for category, flags := range categories {
+		t.Run(category, func(t *testing.T) {
+			for _, flag := range flags {
+				if !VLLMAllowedFlags[flag] {
+					t.Errorf("VLLMAllowedFlags missing %s flag %q", category, flag)
+				}
+			}
+		})
 	}
 }
 
 func TestDangerousFlagsNotAllowed(t *testing.T) {
-	// Ensure dangerous flags are NOT in the allowlists
+	// Ensure dangerous flags involving file paths are NOT in the allowlists
 	dangerousFlags := []string{
+		// File path flags
 		"--log-file",
 		"--output-file",
 		"--model-path",
@@ -191,6 +211,24 @@ func TestDangerousFlagsNotAllowed(t *testing.T) {
 		"--lora-path",
 		"--grammar-file",
 		"--prompt-file",
+		// llama.cpp specific path flags
+		"--slot-save-path",
+		"-mm", "--mmproj",
+		"-mmu", "--mmproj-url",
+		"-jf", "--json-schema-file",
+		"--chat-template-file",
+		"--path",
+		"--webui-config-file",
+		"--api-key-file",
+		"--ssl-key-file",
+		"--ssl-cert-file",
+		"--models-dir",
+		"--models-preset",
+		"-md", "--model-draft",
+		"--lora",
+		"--lora-scaled",
+		"--control-vector",
+		"--control-vector-scaled",
 	}
 
 	for _, flag := range dangerousFlags {
@@ -202,3 +240,28 @@ func TestDangerousFlagsNotAllowed(t *testing.T) {
 		}
 	}
 }
+
+func TestIssue515Flags(t *testing.T) {
+	// Verify all flags from GitHub issue #515 are allowed
+	issue515Flags := []string{
+		"--n-gpu-layers",
+		"--no-mmap",
+		"--flash-attn",
+		"--jinja",
+		"--top-p",
+		"--top-k",
+		"--temp",
+		"--min-p",
+		"--presence-penalty",
+		"--cache-type-k",
+		"--cache-type-v",
+		"--n-predict",
+		"--threads",
+	}
+
+	for _, flag := range issue515Flags {
+		if !LlamaCppAllowedFlags[flag] {
+			t.Errorf("Flag %q from issue #515 should be in LlamaCppAllowedFlags", flag)
+		}
+	}
+}

From 017519e353dce9c117a4037795008a302ddaff0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= <ignasi.lopez.luna@gmail.com>
Date: Tue, 27 Jan 2026 11:05:32 +0100
Subject: [PATCH 4/5] test(runtime): replace custom string contains function
 with strings.Contains for error message validation

---
 pkg/inference/runtime_flags_test.go | 27 ++++++---------------------
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/pkg/inference/runtime_flags_test.go b/pkg/inference/runtime_flags_test.go
index 7439cbd8..349878d3 100644
--- a/pkg/inference/runtime_flags_test.go
+++ b/pkg/inference/runtime_flags_test.go
@@ -1,6 +1,7 @@
 package inference
 
 import (
+	"strings"
 	"testing"
 )
 
@@ -213,13 +214,13 @@ func TestValidateRuntimeFlags_ErrorMessages(t *testing.T) {
 		}
 
 		errMsg := err.Error()
-		if !contains(errMsg, "--log-file") {
+		if !strings.Contains(errMsg, "--log-file") {
 			t.Errorf("Error message should contain the offending flag, got: %s", errMsg)
 		}
-		if !contains(errMsg, "not allowed") {
+		if !strings.Contains(errMsg, "not allowed") {
 			t.Errorf("Error message should explain rejection, got: %s", errMsg)
 		}
-		if !contains(errMsg, "llama.cpp") {
+		if !strings.Contains(errMsg, "llama.cpp") {
 			t.Errorf("Error message should mention the backend, got: %s", errMsg)
 		}
 	})
@@ -232,10 +233,10 @@ func TestValidateRuntimeFlags_ErrorMessages(t *testing.T) {
 		}
 
 		errMsg := err.Error()
-		if !contains(errMsg, "/var/log/test.log") {
+		if !strings.Contains(errMsg, "/var/log/test.log") {
 			t.Errorf("Error message should contain the offending value, got: %s", errMsg)
 		}
-		if !contains(errMsg, "paths are not allowed") {
+		if !strings.Contains(errMsg, "paths are not allowed") {
 			t.Errorf("Error message should explain why it failed, got: %s", errMsg)
 		}
 	})
@@ -286,19 +287,3 @@ func TestValidatePathSafety(t *testing.T) {
 		})
 	}
 }
-
-// contains is a helper function to check if a string contains a substring
-func contains(s, substr string) bool {
-	return len(s) >= len(substr) && (s == substr || substr == "" ||
-		(s != "" && indexOf(s, substr) >= 0))
-}
-
-// indexOf returns the index of substr in s, or -1 if not found
-func indexOf(s, substr string) int {
-	for i := 0; i <= len(s)-len(substr); i++ {
-		if s[i:i+len(substr)] == substr {
-			return i
-		}
-	}
-	return -1
-}

From 548d6041ea9ebefad3bf32e43f017192b11fd77a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= <ignasi.lopez.luna@gmail.com>
Date: Tue, 27 Jan 2026 15:51:08 +0100
Subject: [PATCH 5/5] feat(runtime): add flag injection validation to prevent
 disallowed flags in values

---
 pkg/inference/runtime_flags.go      | 29 ++++++++++++++++
 pkg/inference/runtime_flags_test.go | 51 +++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/pkg/inference/runtime_flags.go b/pkg/inference/runtime_flags.go
index 33120fc3..bb9e1f6b 100644
--- a/pkg/inference/runtime_flags.go
+++ b/pkg/inference/runtime_flags.go
@@ -22,6 +22,11 @@ func ValidateRuntimeFlags(backendName string, flags []string) error {
 		}
 	}
 
+	// Check for flags in values (e.g., --seed=--log-file=foo or --seed=-l)
+	if err := validateNoFlagInjection(flags); err != nil {
+		return err
+	}
+
 	// still check for path characters in values
 	return validatePathSafety(flags)
 }
@@ -46,3 +51,27 @@ func validatePathSafety(flags []string) error {
 	}
 	return nil
 }
+
+// validateNoFlagInjection checks for flags in values when using the = format.
+// This prevents attacks like --seed=--log-file=foo or --seed=-l where disallowed flags
+// are embedded as values.
+// Values starting with - are only allowed if followed by a digit (negative numbers like -1, -0.5).
+func validateNoFlagInjection(flags []string) error {
+	for _, flag := range flags {
+		if idx := strings.Index(flag, "="); idx != -1 {
+			value := flag[idx+1:]
+			if strings.HasPrefix(value, "-") {
+				// Allow negative numbers (-1, -0.5) but reject flags (-l, --flag)
+				if len(value) < 2 || !isDigit(value[1]) {
+					return fmt.Errorf("invalid flag %q: value cannot start with '-' unless followed by a digit", flag)
+				}
+			}
+		}
+	}
+	return nil
+}
+
+// isDigit returns true if the byte is an ASCII digit (0-9)
+func isDigit(b byte) bool {
+	return b >= '0' && b <= '9'
+}
diff --git a/pkg/inference/runtime_flags_test.go b/pkg/inference/runtime_flags_test.go
index 349878d3..78fb84a3 100644
--- a/pkg/inference/runtime_flags_test.go
+++ b/pkg/inference/runtime_flags_test.go
@@ -186,6 +186,57 @@ func TestValidateRuntimeFlags(t *testing.T) {
 			expectError: false,
 			description: "Flags with dots and numbers (no slash) should pass",
 		},
+
+		// Flag injection tests (smuggling flags via = separator)
+		{
+			name:        "llama.cpp: reject long flag injection via equals",
+			backend:     "llama.cpp",
+			flags:       []string{"--seed=--log-file=container-to-host.log"},
+			expectError: true,
+			description: "Smuggled long flags via = separator should be rejected",
+		},
+		{
+			name:        "llama.cpp: reject short flag injection via equals",
+			backend:     "llama.cpp",
+			flags:       []string{"--seed=-l"},
+			expectError: true,
+			description: "Smuggled short flags via = separator should be rejected",
+		},
+		{
+			name:        "llama.cpp: reject dash-only value via equals",
+			backend:     "llama.cpp",
+			flags:       []string{"--seed=-"},
+			expectError: true,
+			description: "Single dash as value should be rejected",
+		},
+		{
+			name:        "llama.cpp: reject dash-dot value via equals",
+			backend:     "llama.cpp",
+			flags:       []string{"--temp=-.5"},
+			expectError: true,
+			description: "Dash followed by non-digit should be rejected",
+		},
+		{
+			name:        "llama.cpp: allow negative integer via equals",
+			backend:     "llama.cpp",
+			flags:       []string{"--threads=-1"},
+			expectError: false,
+			description: "Negative integer values should be allowed",
+		},
+		{
+			name:        "llama.cpp: allow negative float via equals",
+			backend:     "llama.cpp",
+			flags:       []string{"--temp=-0.5"},
+			expectError: false,
+			description: "Negative float values should be allowed",
+		},
+		{
+			name:        "llama.cpp: allow zero via equals",
+			backend:     "llama.cpp",
+			flags:       []string{"--seed=0"},
+			expectError: false,
+			description: "Zero value should be allowed",
+		},
 	}
 
 	for _, tt := range tests {