diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..2b1cd6ba4 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,49 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +.venv +.venv_temp +venv/ +ENV/ +env/ + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Git +.git/ +.gitignore + +# Dataset cache (will be copied separately) +dataset_cache/ + +# Logs +*.log + +# OS +.DS_Store +Thumbs.db diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..b68dc53d5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,56 @@ +# GuideLLM with uv, CUDA 13, and embedded dataset +FROM nvidia/cuda:13.0.0-base-ubuntu22.04 + +# Configure timezone non-interactively +ENV DEBIAN_FRONTEND=noninteractive +ENV TZ=UTC + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + ffmpeg \ + curl \ + ca-certificates \ + software-properties-common \ + && add-apt-repository ppa:deadsnakes/ppa \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + python3.12 \ + python3.12-venv \ + python3.12-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install uv +COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv + +# Create symlink for python command +RUN ln -s /usr/bin/python3.12 /usr/bin/python + +# Create non-root user +RUN useradd -m -s /bin/bash guidellm +USER guidellm +WORKDIR /home/guidellm + +# Copy source code +COPY --chown=guidellm:guidellm . /home/guidellm/guidellm-src + +# Install guidellm with uv +WORKDIR /home/guidellm/guidellm-src +RUN uv venv --python python3.12 /home/guidellm/.venv && \ + . /home/guidellm/.venv/bin/activate && \ + uv pip install --no-cache -e . + +# NOTE: Dataset will be downloaded at runtime to save image size +# The LibriSpeech dataset (~500MB) would make the image too large +# HuggingFace will cache it in HF_HOME=/home/guidellm/.cache/huggingface + +# Add venv to PATH +ENV PATH="/home/guidellm/.venv/bin:$PATH" +ENV HF_HOME="/home/guidellm/.cache/huggingface" + +# Create results volume +WORKDIR /home/guidellm +VOLUME /results + +ENTRYPOINT ["guidellm"] +CMD ["benchmark", "run"] diff --git a/src/guidellm/backends/openai.py b/src/guidellm/backends/openai.py index 57e2d95a6..d62badf86 100644 --- a/src/guidellm/backends/openai.py +++ b/src/guidellm/backends/openai.py @@ -52,6 +52,9 @@ def __init__( self, target: str, model: str = "", + api_key: str | None = None, + bearer_token: str | None = None, + headers: dict[str, str] | None = None, api_routes: dict[str, str] | None = None, response_handlers: dict[str, Any] | None = None, timeout: float = 60.0, @@ -65,6 +68,9 @@ def __init__( :param target: Base URL of the OpenAI-compatible server :param model: Model identifier for generation requests + :param api_key: API key for authentication (used as Bearer token) + :param bearer_token: Bearer token for authentication (alternative to api_key) + :param headers: Additional headers to include in all requests :param api_routes: Custom API endpoint routes mapping :param response_handlers: Custom response handlers for different request types :param timeout: Request timeout in seconds @@ -79,6 +85,29 @@ def __init__( self.target = target.rstrip("/").removesuffix("/v1") self.model = model + # Build default headers with authentication + from guidellm.settings import settings + + self._default_headers: dict[str, str] = {} + + # Merge headers from settings first (lowest priority) + if settings.openai.headers: + self._default_headers.update(settings.openai.headers) + + # Add explicit headers parameter (medium priority) + if headers: + self._default_headers.update(headers) + + # Resolve API key (highest priority): explicit param > settings + resolved_api_key = api_key or settings.openai.api_key + resolved_bearer_token = bearer_token or settings.openai.bearer_token + + # Set Authorization header if we have credentials + if resolved_api_key: + self._default_headers["Authorization"] = f"Bearer {resolved_api_key}" + elif resolved_bearer_token: + self._default_headers["Authorization"] = f"Bearer {resolved_bearer_token}" + # Store configuration self.api_routes = api_routes or { "health": "health", @@ -184,7 +213,7 @@ async def available_models(self) -> list[str]: raise RuntimeError("Backend not started up for process.") target = f"{self.target}/{self.api_routes['models']}" - response = await self._async_client.get(target) + response = await self._async_client.get(target, headers=self._default_headers) response.raise_for_status() return [item["id"] for item in response.json()["data"]] @@ -245,13 +274,19 @@ async def resolve( # type: ignore[override] request.request_type, handler_overrides=self.response_handlers ) + # Merge default headers with request-specific headers + merged_headers = { + **self._default_headers, + **(request.arguments.headers or {}), + } + if not request.arguments.stream: request_info.timings.request_start = time.time() response = await self._async_client.request( request.arguments.method or "POST", request_url, params=request.arguments.params, - headers=request.arguments.headers, + headers=merged_headers, json=request_json, data=request_data, files=request_files, @@ -269,7 +304,7 @@ async def resolve( # type: ignore[override] request.arguments.method or "POST", request_url, params=request.arguments.params, - headers=request.arguments.headers, + headers=merged_headers, json=request_json, data=request_data, files=request_files, @@ -331,4 +366,9 @@ def _resolve_validate_kwargs( if "method" not in validate_kwargs: validate_kwargs["method"] = "GET" + # Include default headers (with auth) in validation request + if self._default_headers: + existing_headers = validate_kwargs.get("headers", {}) + validate_kwargs["headers"] = {**self._default_headers, **existing_headers} + return validate_kwargs diff --git a/src/guidellm/data/deserializers/huggingface.py b/src/guidellm/data/deserializers/huggingface.py index efe6882a5..95eb3a776 100644 --- a/src/guidellm/data/deserializers/huggingface.py +++ b/src/guidellm/data/deserializers/huggingface.py @@ -46,7 +46,20 @@ def __call__( load_error = None - if ( + # Handle hf: prefix for explicit HuggingFace Hub datasets + if isinstance(data, str) and data.startswith("hf:"): + hub_id = data[3:] # Remove "hf:" prefix + try: + return load_dataset(hub_id, **data_kwargs) + except ( + FileNotFoundDatasetsError, + DatasetNotFoundError, + DataFilesNotFoundError, + ) as err: + load_error = err + # Fall through to raise error below + + elif ( isinstance(data, str | Path) and (path := Path(data)).exists() and ((path.is_file() and path.suffix == ".py") or path.is_dir()) @@ -71,15 +84,16 @@ def __call__( ) as err2: load_error = err2 - try: - # Handle dataset identifier from the Hugging Face Hub - return load_dataset(str(data), **data_kwargs) - except ( - FileNotFoundDatasetsError, - DatasetNotFoundError, - DataFilesNotFoundError, - ) as err: - load_error = err + else: + try: + # Handle dataset identifier from the Hugging Face Hub + return load_dataset(str(data), **data_kwargs) + except ( + FileNotFoundDatasetsError, + DatasetNotFoundError, + DataFilesNotFoundError, + ) as err: + load_error = err not_supported = DataNotSupportedError( "Unsupported data for HuggingFaceDatasetDeserializer, "