diff --git a/ai/select-algorithm-dotnet/CompareAll.cs b/ai/select-algorithm-dotnet/CompareAll.cs index 9eb9c75..562d8c1 100644 --- a/ai/select-algorithm-dotnet/CompareAll.cs +++ b/ai/select-algorithm-dotnet/CompareAll.cs @@ -30,11 +30,11 @@ public static void Run(AppConfiguration appConfig) Console.WriteLine(new string('=', 60)); // Use config values with env var overrides for compare-specific settings - var databaseName = appConfig.MongoDB.DatabaseName; + var databaseName = appConfig.DocumentDB.DatabaseName; var dataFile = appConfig.DataFiles.WithVectors; var vectorField = appConfig.Embedding.EmbeddedField; var dimensions = appConfig.Embedding.Dimensions; - var batchSize = appConfig.MongoDB.LoadBatchSize; + var batchSize = appConfig.DocumentDB.LoadBatchSize; var queryText = Environment.GetEnvironmentVariable("QUERY_TEXT") ?? "luxury hotel near the beach"; var topK = int.Parse(Environment.GetEnvironmentVariable("TOP_K") ?? "5"); @@ -65,7 +65,7 @@ public static void Run(AppConfiguration appConfig) Console.WriteLine("Embedding generated (reused for all searches)\n"); // Define 9 index configurations - var configs = BuildIndexConfigs(dimensions); + var configs = BuildIndexConfigs(); // Run each config sequentially: drop→create→wait→search // DocumentDB doesn't allow multiple vector indexes of the same kind on the same field @@ -77,7 +77,7 @@ public static void Run(AppConfiguration appConfig) DropVectorIndexes(collection, vectorField); // 2. Create this specific index - CreateIndex(collection, vectorField, config); + CreateIndex(collection, vectorField, dimensions, config); Console.WriteLine($" ✓ {config.Name} created"); // 3. Search with retries while the index becomes available @@ -139,7 +139,7 @@ public static void Run(AppConfiguration appConfig) } } - private static List BuildIndexConfigs(int dimensions) + private static List BuildIndexConfigs() { string[] metrics = ["COS", "L2", "IP"]; var configs = new List(); @@ -177,7 +177,7 @@ private static void DropVectorIndexes(IMongoCollection collection, catch { } } - private static void CreateIndex(IMongoCollection collection, string vectorField, IndexConfig config) + private static void CreateIndex(IMongoCollection collection, string vectorField, int dimensions, IndexConfig config) { // Drop existing index with same name if present try @@ -192,7 +192,7 @@ private static void CreateIndex(IMongoCollection collection, strin var cosmosSearchOptions = new BsonDocument { { "kind", config.Kind }, - { "dimensions", int.Parse(Environment.GetEnvironmentVariable("EMBEDDING_DIMENSIONS") ?? "1536") }, + { "dimensions", dimensions }, { "similarity", config.Similarity } }; diff --git a/ai/select-algorithm-dotnet/Models/Configuration.cs b/ai/select-algorithm-dotnet/Models/Configuration.cs index cbca25b..3b4cb09 100644 --- a/ai/select-algorithm-dotnet/Models/Configuration.cs +++ b/ai/select-algorithm-dotnet/Models/Configuration.cs @@ -3,7 +3,7 @@ namespace SelectAlgorithm.Models; public class AppConfiguration { public AzureOpenAIConfiguration AzureOpenAI { get; set; } = new(); - public MongoDBConfiguration MongoDB { get; set; } = new(); + public DocumentDBConfiguration DocumentDB { get; set; } = new(); public EmbeddingConfiguration Embedding { get; set; } = new(); public VectorSearchConfiguration VectorSearch { get; set; } = new(); public DataFilesConfiguration DataFiles { get; set; } = new(); @@ -15,7 +15,7 @@ public class AzureOpenAIConfiguration public string EmbeddingModel { get; set; } = "text-embedding-3-small"; } -public class MongoDBConfiguration +public class DocumentDBConfiguration { public string ClusterName { get; set; } = string.Empty; public string DatabaseName { get; set; } = "Hotels"; diff --git a/ai/select-algorithm-dotnet/README.md b/ai/select-algorithm-dotnet/README.md index 2621f77..b750f20 100644 --- a/ai/select-algorithm-dotnet/README.md +++ b/ai/select-algorithm-dotnet/README.md @@ -19,13 +19,13 @@ Demonstrates three vector index algorithms available in Azure DocumentDB: 1. **Configure environment:** - The .NET sample uses `appsettings.json` for configuration. After deploying with `azd up`, you can export values: + The .NET sample uses `appsettings.json` for configuration. You can set values directly in `appsettings.json`, or if you deployed with `azd up`, export your provisioned values first: ```bash azd env get-values ``` - Then update `appsettings.json` with your Azure resource values. + > **Note:** `azd` is optional. You can skip it and edit `appsettings.json` manually with your Azure resource values. 2. Edit `appsettings.json` with your configuration: @@ -35,29 +35,43 @@ Demonstrates three vector index algorithms available in Azure DocumentDB: "EmbeddingModel": "text-embedding-3-small", "Endpoint": "https://.openai.azure.com" }, - "MongoDB": { + "DocumentDB": { "ClusterName": "", "DatabaseName": "Hotels", "LoadBatchSize": 100 }, "Embedding": { "EmbeddedField": "DescriptionVector", - "Dimensions": 1536, - "EmbeddingSizeBatch": 16 + "Dimensions": 1536 }, "DataFiles": { - "WithVectors": "../data/Hotels_Vector.json" + "WithVectors": "data/Hotels_Vector.json" } } ``` -3. Copy the shared data file: + > **Note:** .NET configuration also supports environment variable overrides. Use double-underscore (`__`) as the hierarchy separator: + > ```bash + > export DocumentDB__ClusterName=your-cluster-name + > export DocumentDB__DatabaseName=Hotels + > export AzureOpenAI__Endpoint=https://your-resource.openai.azure.com + > export AzureOpenAI__EmbeddingModel=text-embedding-3-small + > ``` + > Environment variables take precedence over `appsettings.json` values. + +3. Copy the shared data file into the local `data/` directory: + + ```bash + mkdir -p data && cp ../data/Hotels_Vector.json data/ + ``` + +4. Sign in to Azure for passwordless authentication: ```bash - cp ../data/Hotels_Vector.json . + az login ``` -4. Restore packages: +5. Restore packages: ```bash dotnet restore @@ -75,15 +89,14 @@ dotnet run | Setting (appsettings.json) | Default | Description | |---------------------------|---------|-------------| -| `MongoDB:ClusterName` | (required) | DocumentDB cluster name | +| `DocumentDB:ClusterName` | (required) | DocumentDB cluster name | | `AzureOpenAI:Endpoint` | (required) | Azure OpenAI endpoint | -| `AzureOpenAI:EmbeddingModel` | (required) | Embedding model deployment name | -| `DataFiles:WithVectors` | `../data/Hotels_Vector.json` | Path to vectors JSON file | +| `AzureOpenAI:EmbeddingModel` | `text-embedding-3-small` | Embedding model deployment name | +| `DataFiles:WithVectors` | `data/Hotels_Vector.json` | Path to vectors JSON file | | `Embedding:EmbeddedField` | `DescriptionVector` | Field name containing embeddings | | `Embedding:Dimensions` | `1536` | Vector dimensions | -| `MongoDB:DatabaseName` | `Hotels` | Target database name | -| `MongoDB:LoadBatchSize` | `100` | Batch size for data loading | -| `Embedding:EmbeddingSizeBatch` | `16` | Batch size for embedding requests | +| `DocumentDB:DatabaseName` | `Hotels` | Target database name | +| `DocumentDB:LoadBatchSize` | `100` | Batch size for data loading | **Additional environment variables for compare mode:** @@ -91,7 +104,6 @@ dotnet run |----------|---------|-------------| | `QUERY_TEXT` | `luxury hotel near the beach` | Search query text | | `TOP_K` | `5` | Number of results per search | -| `VERBOSE` | `false` | Show detailed per-result output | ## How It Works diff --git a/ai/select-algorithm-dotnet/Utils.cs b/ai/select-algorithm-dotnet/Utils.cs index 62590ad..01d97a2 100644 --- a/ai/select-algorithm-dotnet/Utils.cs +++ b/ai/select-algorithm-dotnet/Utils.cs @@ -34,9 +34,9 @@ public static class Utils { public static IMongoClient GetMongoClientPasswordless(AppConfiguration config) { - var clusterName = config.MongoDB.ClusterName; + var clusterName = config.DocumentDB.ClusterName; if (string.IsNullOrEmpty(clusterName)) - throw new InvalidOperationException("MongoDB:ClusterName is required in appsettings.json"); + throw new InvalidOperationException("DocumentDB:ClusterName is required in appsettings.json"); var credential = new DefaultAzureCredential(); diff --git a/ai/select-algorithm-dotnet/appsettings.json b/ai/select-algorithm-dotnet/appsettings.json index 68ee696..b6634fb 100644 --- a/ai/select-algorithm-dotnet/appsettings.json +++ b/ai/select-algorithm-dotnet/appsettings.json @@ -1,8 +1,8 @@ { - "MongoDB": { + "DocumentDB": { "DatabaseName": "Hotels", "ClusterName": "", - "LoadBatchSize": 50 + "LoadBatchSize": 100 }, "VectorSearch": { "Similarity": "", @@ -14,11 +14,10 @@ "EmbeddingModel": "text-embedding-3-small" }, "DataFiles": { - "WithVectors": "../data/Hotels_Vector.json" + "WithVectors": "data/Hotels_Vector.json" }, "Embedding": { "EmbeddedField": "DescriptionVector", - "Dimensions": 1536, - "EmbeddingSizeBatch": 16 + "Dimensions": 1536 } } diff --git a/ai/select-algorithm-dotnet/quickstart.md b/ai/select-algorithm-dotnet/quickstart.md index e94c2da..140a83d 100644 --- a/ai/select-algorithm-dotnet/quickstart.md +++ b/ai/select-algorithm-dotnet/quickstart.md @@ -149,7 +149,7 @@ Find the [sample code](https://github.com/Azure-Samples/documentdb-samples/tree/ ```bash export AzureOpenAI__Endpoint="https://.openai.azure.com" export AzureOpenAI__EmbeddingModel="text-embedding-3-small" - export MongoDB__ClusterName="" + export DocumentDB__ClusterName="" export DataFiles__WithVectors="data/Hotels_Vector.json" export AZURE_TENANT_ID="" ``` @@ -159,7 +159,7 @@ Find the [sample code](https://github.com/Azure-Samples/documentdb-samples/tree/ ```powershell $env:AzureOpenAI__Endpoint="https://.openai.azure.com" $env:AzureOpenAI__EmbeddingModel="text-embedding-3-small" - $env:MongoDB__ClusterName="" + $env:DocumentDB__ClusterName="" $env:DataFiles__WithVectors="data/Hotels_Vector.json" $env:AZURE_TENANT_ID="" ``` @@ -171,7 +171,7 @@ Find the [sample code](https://github.com/Azure-Samples/documentdb-samples/tree/ - ``: Your Azure DocumentDB cluster name - ``: Your Microsoft Entra tenant ID - These environment variables override the matching values in `appsettings.json`. For example, `MongoDB__ClusterName` overrides `MongoDB:ClusterName` and `AzureOpenAI__Endpoint` overrides `AzureOpenAI:Endpoint`. + These environment variables override the matching values in `appsettings.json`. For example, `DocumentDB__ClusterName` overrides `DocumentDB:ClusterName` and `AzureOpenAI__Endpoint` overrides `AzureOpenAI:Endpoint`. You should always prefer passwordless authentication. For more information on setting up managed identity and the full range of your authentication options, see [Authenticate .NET apps to Azure services by using the Azure SDK for .NET](/dotnet/azure/sdk/authentication). @@ -205,7 +205,7 @@ Find the [sample code](https://github.com/Azure-Samples/documentdb-samples/tree/ "Endpoint": "https://.openai.azure.com", "EmbeddingModel": "text-embedding-3-small" }, - "MongoDB": { + "DocumentDB": { "ClusterName": "", "DatabaseName": "Hotels", "LoadBatchSize": 100 @@ -226,7 +226,7 @@ Find the [sample code](https://github.com/Azure-Samples/documentdb-samples/tree/ } ``` - You can keep placeholder values in `appsettings.json` and override them at runtime with environment variables such as `AzureOpenAI__Endpoint` and `MongoDB__ClusterName`. + You can keep placeholder values in `appsettings.json` and override them at runtime with environment variables such as `AzureOpenAI__Endpoint` and `DocumentDB__ClusterName`. ## Create code files diff --git a/ai/select-algorithm-go/README.md b/ai/select-algorithm-go/README.md index f03828e..18d6f86 100644 --- a/ai/select-algorithm-go/README.md +++ b/ai/select-algorithm-go/README.md @@ -20,36 +20,25 @@ This sample demonstrates how to compare different vector search algorithms (IVF, 2. **Configure environment variables:** - After deploying with `azd up`, create a `.env` file with your provisioned resource values: + After deploying with `azd up`, use the provisioned output values to set these required environment variables in your shell: ```bash - azd env get-values > .env + export DOCUMENTDB_CLUSTER_NAME=your-cluster-name + export AZURE_OPENAI_EMBEDDING_ENDPOINT=https://your-resource.openai.azure.com + export AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small + export AZURE_DOCUMENTDB_DATABASENAME=Hotels + export DATA_FILE_WITH_VECTORS=data/Hotels_Vector.json + export EMBEDDED_FIELD=DescriptionVector + export EMBEDDING_DIMENSIONS=1536 ``` - Alternatively, copy the example and fill in values manually: +3. **Copy the shared data file** into the local `data/` directory: ```bash - cp .env.example .env + mkdir -p data && cp ../data/Hotels_Vector.json data/ ``` - Required variables: - ```env - DOCUMENTDB_CLUSTER_NAME=your-cluster-name - AZURE_OPENAI_EMBEDDING_ENDPOINT=https://your-resource.openai.azure.com - AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small - AZURE_DOCUMENTDB_DATABASENAME=Hotels - DATA_FILE_WITH_VECTORS=../data/Hotels_Vector.json - EMBEDDED_FIELD=DescriptionVector - EMBEDDING_DIMENSIONS=1536 - ``` - -3. **Copy the shared data file** into this directory: - - ```bash - cp ../data/Hotels_Vector.json . - ``` - - The `DATA_FILE_WITH_VECTORS` env var defaults to `../data/Hotels_Vector.json`. + The `DATA_FILE_WITH_VECTORS` env var defaults to `data/Hotels_Vector.json`. 4. **Install dependencies**: @@ -116,7 +105,7 @@ go run ./src/... | `AZURE_OPENAI_EMBEDDING_ENDPOINT` | *(required)* | Azure OpenAI endpoint | | `AZURE_OPENAI_EMBEDDING_MODEL` | `text-embedding-3-small` | Embedding model name | | `AZURE_DOCUMENTDB_DATABASENAME` | `Hotels` | Database name | -| `DATA_FILE_WITH_VECTORS` | `../data/Hotels_Vector.json` | Path to data file | +| `DATA_FILE_WITH_VECTORS` | `data/Hotels_Vector.json` | Path to data file | | `EMBEDDED_FIELD` | `DescriptionVector` | Field containing embeddings | | `EMBEDDING_DIMENSIONS` | `1536` | Embedding vector dimensions | | `LOAD_SIZE_BATCH` | `100` | Batch size for data insertion | @@ -147,7 +136,6 @@ go run ./src/... ``` select-algorithm-go/ -├── .env.example # Environment variable template ├── go.mod # Go module dependencies ├── go.sum # Go module checksums ├── output/ # Sample output files diff --git a/ai/select-algorithm-java/README.md b/ai/select-algorithm-java/README.md index 2449f40..ca2648c 100644 --- a/ai/select-algorithm-java/README.md +++ b/ai/select-algorithm-java/README.md @@ -14,30 +14,22 @@ This sample demonstrates how to compare all three vector search index algorithms 1. ### Configure environment variables - After deploying with `azd up`, create a `.env` file with your provisioned resource values: + After deploying with `azd up`, use the provisioned output values to set the required environment variables in your terminal: ```bash - azd env get-values > .env + export DOCUMENTDB_CLUSTER_NAME=your-cluster-name + export AZURE_OPENAI_EMBEDDING_ENDPOINT=https://your-resource.openai.azure.com + export AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small + export AZURE_DOCUMENTDB_DATABASENAME=Hotels + export DATA_FILE_WITH_VECTORS=data/Hotels_Vector.json + export EMBEDDED_FIELD=DescriptionVector + export EMBEDDING_DIMENSIONS=1536 ``` - This creates a `.env` file at the repository root with the connection strings and endpoints needed to run the sample. - - Alternatively, copy the example and fill in values manually: - - ```bash - cp .env.example .env - ``` - -2. Update `.env` with your Azure resource details (if not using `azd`): - - `DOCUMENTDB_CLUSTER_NAME` — your DocumentDB cluster name - - `AZURE_OPENAI_EMBEDDING_ENDPOINT` — your Azure OpenAI endpoint - - `AZURE_OPENAI_EMBEDDING_MODEL` — deployment name (e.g., `text-embedding-3-small`) - - `DATA_FILE_WITH_VECTORS` — path to the pre-computed vectors JSON file - -3. Copy the shared data file: +2. Copy the shared data file into the local `data/` directory: ```bash - cp ../data/Hotels_Vector.json . + mkdir -p data && cp ../data/Hotels_Vector.json data/ ``` ## Build @@ -54,16 +46,10 @@ Compare all 9 algorithm × similarity combinations: mvn exec:java -Pcompare ``` -Or via the `ALGORITHM` environment variable: - -```bash -ALGORITHM=compare mvn exec:java -``` - On Windows (PowerShell): ```powershell -$env:ALGORITHM="compare"; mvn exec:java +mvn exec:java -Pcompare ``` ## Algorithms @@ -80,18 +66,15 @@ $env:ALGORITHM="compare"; mvn exec:java |----------|---------|-------------| | `DOCUMENTDB_CLUSTER_NAME` | (required) | DocumentDB cluster name | | `AZURE_OPENAI_EMBEDDING_ENDPOINT` | (required) | Azure OpenAI endpoint | -| `AZURE_OPENAI_EMBEDDING_MODEL` | (required) | Embedding model deployment name | -| `DATA_FILE_WITH_VECTORS` | `../data/Hotels_Vector.json` | Path to vectors JSON file | +| `AZURE_OPENAI_EMBEDDING_MODEL` | `text-embedding-3-small` | Embedding model deployment name | +| `DATA_FILE_WITH_VECTORS` | `data/Hotels_Vector.json` | Path to vectors JSON file | | `EMBEDDED_FIELD` | `DescriptionVector` | Field name containing embeddings | | `EMBEDDING_DIMENSIONS` | `1536` | Vector dimensions | | `AZURE_DOCUMENTDB_DATABASENAME` | `Hotels` | Target database name | -| `LOAD_SIZE_BATCH` | `100` | Batch size for data loading | -| `EMBEDDING_SIZE_BATCH` | `16` | Batch size for embedding requests | -| `ALGORITHM` | (empty = all) | Which algorithm to run | -| `SIMILARITY` | (empty = all) | Similarity metric: `COS`, `L2`, `IP` | | `QUERY_TEXT` | `luxury hotel near the beach` | Search query text | | `TOP_K` | `5` | Number of results per search | -| `VERBOSE` | `false` | Print detailed per-index results | + +`CompareAll.java` always runs all 9 algorithm/metric combinations. It does not read `ALGORITHM` or `SIMILARITY` environment variables. ## Authentication diff --git a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java index 66281ed..ee8733b 100644 --- a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java +++ b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java @@ -55,7 +55,8 @@ public static void run() { collection.drop(); System.out.println(" Collection reset."); - Utils.insertData(collection, data, 100); + int batchSize = Integer.parseInt(System.getenv().getOrDefault("LOAD_SIZE_BATCH", "100")); + Utils.insertData(collection, data, batchSize); // Generate ONE embedding for the query (reused for all 9 searches) OpenAIClient aiClient = Utils.getOpenAIClient(); @@ -113,12 +114,11 @@ public static void run() { int successCount = (int) results.stream().filter(r -> !r.top1Name().equals("(failed)")).count(); if (successCount == 0) { - System.out.println("\n❌ All 9 comparisons failed — no algorithm returned results."); - System.exit(1); - } else { - System.out.printf("%nSummary: %d succeeded, %d failed%n", successCount, 9 - successCount); + throw new IllegalStateException("All 9 comparisons failed — no algorithm returned results."); } + System.out.printf("%nSummary: %d succeeded, %d failed%n", successCount, 9 - successCount); + // Cleanup: drop the comparison collection System.out.println("\n Cleanup: dropping comparison collection..."); collection.drop(); diff --git a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/Utils.java b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/Utils.java index b8b761e..641dcb5 100644 --- a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/Utils.java +++ b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/Utils.java @@ -7,7 +7,6 @@ import com.azure.core.credential.AccessToken; import com.azure.identity.DefaultAzureCredential; import com.azure.identity.DefaultAzureCredentialBuilder; -import com.mongodb.ConnectionString; import com.mongodb.MongoClientSettings; import com.mongodb.MongoCredential; import com.mongodb.client.MongoClient; @@ -39,8 +38,7 @@ public static MongoClient getMongoClient() { throw new IllegalStateException("DOCUMENTDB_CLUSTER_NAME environment variable is required"); } - String connectionUri = String.format( - "mongodb+srv://%s.global.mongocluster.cosmos.azure.com/", clusterName); + String clusterHost = String.format("%s.global.mongocluster.cosmos.azure.com", clusterName); // Use custom OIDC callback with DefaultAzureCredential // This chains through CLI, managed identity, etc. @@ -56,7 +54,8 @@ public static MongoClient getMongoClient() { }); MongoClientSettings settings = MongoClientSettings.builder() - .applyConnectionString(new ConnectionString(connectionUri)) + .applyToClusterSettings(builder -> builder.srvHost(clusterHost)) + .applyToSslSettings(builder -> builder.enabled(true)) .credential(mongoCredential) .retryWrites(false) .build(); diff --git a/ai/select-algorithm-python/README.md b/ai/select-algorithm-python/README.md index 1fe7746..b89a800 100644 --- a/ai/select-algorithm-python/README.md +++ b/ai/select-algorithm-python/README.md @@ -34,30 +34,27 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB. Each 1. ### Configure environment variables - After deploying with `azd up`, create a `.env` file with your provisioned resource values: + After deploying with `azd up`, use the provisioned output values to set the required environment variables in your terminal: ```bash - azd env get-values > .env - ``` - - This creates a `.env` file at the repository root with the connection strings and endpoints needed to run the sample. - - Alternatively, copy the example and fill in values manually: - - ```bash - cp .env.example .env + export DOCUMENTDB_CLUSTER_NAME=your-cluster-name + export AZURE_OPENAI_EMBEDDING_ENDPOINT=https://your-resource.openai.azure.com + export AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small + export AZURE_DOCUMENTDB_DATABASENAME=Hotels + export DATA_FILE_WITH_VECTORS=data/Hotels_Vector.json + export EMBEDDED_FIELD=DescriptionVector + export EMBEDDING_DIMENSIONS=1536 ``` 2. Install dependencies: ```bash - cd src - pip install -r ../requirements.txt + pip install -r requirements.txt ``` -3. Copy the shared data file: +3. Copy the shared data file into the local `data/` directory: ```bash - cp ../data/Hotels_Vector.json . + mkdir -p data && cp ../data/Hotels_Vector.json data/ ``` 4. Ensure you're logged in to Azure: @@ -70,8 +67,7 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB. Each Compare all 9 combinations (3 algorithms × 3 similarity metrics) in a single invocation: ```bash -cd src -python compare_all.py +python src/compare_all.py ``` The script creates a single `hotels` collection, loads data once, then for each of the 9 algorithm/metric combinations: creates the index → searches → drops the index. DocumentDB only allows one vector index per kind per field, so indexes are created sequentially. @@ -82,15 +78,14 @@ The script creates a single `hotels` collection, loads data once, then for each |----------|---------|-------------| | `DOCUMENTDB_CLUSTER_NAME` | (required) | DocumentDB cluster name | | `AZURE_OPENAI_EMBEDDING_ENDPOINT` | (required) | Azure OpenAI endpoint | -| `AZURE_OPENAI_EMBEDDING_MODEL` | (required) | Embedding model deployment name | -| `DATA_FILE_WITH_VECTORS` | `../data/Hotels_Vector.json` | Path to vectors JSON file | +| `AZURE_OPENAI_EMBEDDING_MODEL` | `text-embedding-3-small` | Embedding model deployment name | +| `AZURE_OPENAI_EMBEDDING_API_VERSION` | `2023-05-15` | Azure OpenAI API version | +| `DATA_FILE_WITH_VECTORS` | `data/Hotels_Vector.json` | Path to vectors JSON file | | `EMBEDDED_FIELD` | `DescriptionVector` | Field name containing embeddings | | `EMBEDDING_DIMENSIONS` | `1536` | Vector dimensions | | `AZURE_DOCUMENTDB_DATABASENAME` | `Hotels` | Target database name | | `LOAD_SIZE_BATCH` | `100` | Batch size for data loading | -| `EMBEDDING_SIZE_BATCH` | `16` | Batch size for embedding requests | -| `ALGORITHM` | (empty = all) | Which algorithm to run | -| `SIMILARITY` | (empty = all) | Similarity metric: `COS`, `L2`, `IP` | | `QUERY_TEXT` | `luxury hotel near the beach` | Search query text | | `TOP_K` | `5` | Number of results per search | -| `VERBOSE` | `false` | Show all k results per combo | + +`compare_all.py` always runs all 9 algorithm/metric combinations. It does not read `ALGORITHM` or `SIMILARITY` environment variables. diff --git a/ai/select-algorithm-typescript/README.md b/ai/select-algorithm-typescript/README.md index 73414a8..302df29 100644 --- a/ai/select-algorithm-typescript/README.md +++ b/ai/select-algorithm-typescript/README.md @@ -25,18 +25,18 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB using 3. **Configure environment variables:** - After deploying with `azd up`, create a `.env` file with your provisioned resource values: + This sample reads configuration from environment variables. Export them in your shell before running: ```bash - azd env get-values > .env - ``` - - This creates a `.env` file in the project folder with the connection strings and endpoints needed to run the sample. - - Alternatively, copy the example and fill in values manually: - - ```bash - cp .env.example .env + export DOCUMENTDB_CLUSTER_NAME=your-cluster + export AZURE_OPENAI_EMBEDDING_ENDPOINT=https://your-resource.openai.azure.com + export AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small + export AZURE_OPENAI_EMBEDDING_API_VERSION=2023-05-15 + export AZURE_DOCUMENTDB_DATABASENAME=Hotels + export DATA_FILE_WITH_VECTORS=data/Hotels_Vector.json + export EMBEDDED_FIELD=DescriptionVector + export EMBEDDING_DIMENSIONS=1536 + export LOAD_SIZE_BATCH=100 ``` | Variable | Description | @@ -50,17 +50,19 @@ Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB using | `EMBEDDED_FIELD` | Field name containing the vector (default: `DescriptionVector`) | | `EMBEDDING_DIMENSIONS` | Vector dimensions (default: `1536`) | | `LOAD_SIZE_BATCH` | Batch size for data insertion | - | `SIMILARITY` | Similarity metric: `COS`, `L2`, or `IP` | -5. **Copy the shared data file** into this directory: + `compare-all.ts` always runs all 9 algorithm/metric combinations. It does not read `ALGORITHM` or `SIMILARITY` environment variables. + + 4. **Create a `data/` directory and copy the data file:** ```bash - cp ../data/Hotels_Vector.json . + mkdir -p data + cp ../data/Hotels_Vector.json data/ ``` - The `DATA_FILE_WITH_VECTORS` env var defaults to `../data/Hotels_Vector.json`. + The `DATA_FILE_WITH_VECTORS` env var defaults to `data/Hotels_Vector.json`. -6. **Build the project:** +5. **Build the project:** ```bash npm run build diff --git a/ai/select-algorithm-typescript/package.json b/ai/select-algorithm-typescript/package.json index 5c1f24a..8a9d155 100644 --- a/ai/select-algorithm-typescript/package.json +++ b/ai/select-algorithm-typescript/package.json @@ -4,7 +4,6 @@ "description": "Compare IVF, HNSW, and DiskANN vector index algorithms in Azure DocumentDB", "type": "module", "scripts": { - "env:init": "azd env get-values > .env", "build": "tsc", "start": "node dist/compare-all.js" }, diff --git a/ai/select-algorithm-typescript/src/utils.ts b/ai/select-algorithm-typescript/src/utils.ts index 09ec3dd..5f99337 100644 --- a/ai/select-algorithm-typescript/src/utils.ts +++ b/ai/select-algorithm-typescript/src/utils.ts @@ -20,9 +20,14 @@ export function getConfig() { export const AzureIdentityTokenCallback = async (params: OIDCCallbackParams, credential: TokenCredential): Promise => { const tokenResponse: AccessToken | null = await credential.getToken(['https://ossrdbms-aad.database.windows.net/.default']); + + if (!tokenResponse || !tokenResponse.token) { + throw new Error('Failed to acquire token'); + } + return { - accessToken: tokenResponse?.token || '', - expiresInSeconds: Math.floor(((tokenResponse?.expiresOnTimestamp || 0) - Date.now()) / 1000) + accessToken: tokenResponse.token, + expiresInSeconds: Math.floor((tokenResponse.expiresOnTimestamp - Date.now()) / 1000) }; };