diff --git a/src/index.ts b/src/index.ts index c40ab2be..06aef230 100644 --- a/src/index.ts +++ b/src/index.ts @@ -23,6 +23,7 @@ export { JobResponse, RawText, RagMetadata, + InferenceParameters, DataSchema, } from "./v2/index.js"; -export type { InferenceParameters, PollingOptions } from "./v2/index.js"; +export type { PollingOptions } from "./v2/index.js"; diff --git a/src/v2/client.ts b/src/v2/client.ts index 53e17c29..698b9ac6 100644 --- a/src/v2/client.ts +++ b/src/v2/client.ts @@ -3,62 +3,10 @@ import { Dispatcher } from "undici"; import { InputSource } from "@/input/index.js"; import { errorHandler } from "@/errors/handler.js"; import { LOG_LEVELS, logger } from "@/logger.js"; -import { StringDict } from "@/parsing/stringDict.js"; import { ErrorResponse, InferenceResponse, JobResponse } from "./parsing/index.js"; import { MindeeApiV2 } from "./http/mindeeApiV2.js"; import { MindeeHttpErrorV2 } from "./http/errors.js"; -import { PollingOptions, DataSchema } from "./client/index.js"; -import { setAsyncParams } from "./client/pollingOptions.js"; - -/** - * Parameters accepted by the asynchronous **inference** v2 endpoint. - * - * All fields are optional except `modelId`. - * - * @category ClientV2 - * @example - * const params = { - * modelId: "YOUR_MODEL_ID", - * rag: true, - * alias: "YOUR_ALIAS", - * webhookIds: ["YOUR_WEBHOOK_ID_1", "YOUR_WEBHOOK_ID_2"], - * pollingOptions: { - * initialDelaySec: 2, - * delaySec: 1.5, - * } - * }; - */ -export interface InferenceParameters { - /** Model ID to use for the inference. **Required** */ - modelId: string; - /** Use Retrieval-Augmented Generation during inference. */ - rag?: boolean; - /** Extract the entire text from the document as strings, and fill the `rawText` attribute. */ - rawText?: boolean; - /** Calculate bounding box polygons for values, and fill the `locations` attribute of fields. */ - polygon?: boolean; - /** Calculate confidence scores for values, and fill the `confidence` attribute of fields. - * Useful for automation.*/ - confidence?: boolean; - /** Use an alias to link the file to your own DB. If empty, no alias will be used. */ - alias?: string; - /** Additional text context used by the model during inference. - * *Not recommended*, for specific use only. */ - textContext?: string; - /** Webhook IDs to call after all processing is finished. - * If empty, no webhooks will be used. */ - webhookIds?: string[]; - /** Client-side polling configuration (see {@link PollingOptions}). */ - pollingOptions?: PollingOptions; - /** By default, the file is closed once the upload is finished. - * Set to `false` to keep it open. */ - closeFile?: boolean; - /** - * Dynamic changes to the data schema of the model for this inference. - * Not recommended, for specific use only. - */ - dataSchema?: DataSchema|StringDict|string; -} +import { InferenceParameters } from "./client/index.js"; /** * Options for the V2 Mindee Client. @@ -111,18 +59,6 @@ export class Client { logger.debug("Client V2 Initialized"); } - /** - * Checks the Data Schema. - * @param params Input Inference parameters. - */ - validateDataSchema(params: InferenceParameters): void { - if (params.dataSchema !== undefined && params.dataSchema !== null){ - if (!(params.dataSchema instanceof DataSchema)){ - params.dataSchema = new DataSchema(params.dataSchema); - } - } - } - /** * Send the document to an asynchronous endpoint and return its ID in the queue. * @param inputSource file or URL to parse. @@ -132,15 +68,18 @@ export class Client { */ async enqueueInference( inputSource: InputSource, - params: InferenceParameters + params: InferenceParameters| ConstructorParameters[0] ): Promise { if (inputSource === undefined) { throw new Error("The 'enqueue' function requires an input document."); } - this.validateDataSchema(params); + const inferenceParams = params instanceof InferenceParameters + ? params + : new InferenceParameters(params); + await inputSource.init(); - return await this.mindeeApi.reqPostInferenceEnqueue(inputSource, params); + return await this.mindeeApi.reqPostInferenceEnqueue(inputSource, inferenceParams); } /** @@ -183,9 +122,14 @@ export class Client { */ async enqueueAndGetInference( inputSource: InputSource, - params: InferenceParameters + params: InferenceParameters| ConstructorParameters[0] ): Promise { - const validatedAsyncParams = setAsyncParams(params.pollingOptions); + const inferenceParams = params instanceof InferenceParameters + ? params + : new InferenceParameters(params); + + const pollingOptions = inferenceParams.getValidatedPollingOptions(); + const enqueueResponse: JobResponse = await this.enqueueInference(inputSource, params); if (enqueueResponse.job.id === undefined || enqueueResponse.job.id.length === 0) { logger.error(`Failed enqueueing:\n${enqueueResponse.getRawHttp()}`); @@ -196,10 +140,14 @@ export class Client { `Successfully enqueued document with job id: ${queueId}.` ); - await setTimeout(validatedAsyncParams.initialDelaySec * 1000, undefined, validatedAsyncParams.initialTimerOptions); + await setTimeout( + pollingOptions.initialDelaySec * 1000, + undefined, + pollingOptions.initialTimerOptions + ); let retryCounter: number = 1; let pollResults: JobResponse = await this.getJob(queueId); - while (retryCounter < validatedAsyncParams.maxRetries) { + while (retryCounter < pollingOptions.maxRetries) { if (pollResults.job.status === "Failed") { break; } @@ -208,10 +156,14 @@ export class Client { } logger.debug( `Polling server for parsing result with queueId: ${queueId}. -Attempt no. ${retryCounter} of ${validatedAsyncParams.maxRetries}. +Attempt no. ${retryCounter} of ${pollingOptions.maxRetries}. Job status: ${pollResults.job.status}.` ); - await setTimeout(validatedAsyncParams.delaySec * 1000, undefined, validatedAsyncParams.recurringTimerOptions); + await setTimeout( + pollingOptions.delaySec * 1000, + undefined, + pollingOptions.recurringTimerOptions + ); pollResults = await this.getJob(queueId); retryCounter++; } @@ -221,7 +173,7 @@ Job status: ${pollResults.job.status}.` } throw Error( "Asynchronous parsing request timed out after " + - validatedAsyncParams.delaySec * retryCounter + + pollingOptions.delaySec * retryCounter + " seconds" ); } diff --git a/src/v2/client/dataSchema.ts b/src/v2/client/dataSchema.ts index 19787dc2..90579c9f 100644 --- a/src/v2/client/dataSchema.ts +++ b/src/v2/client/dataSchema.ts @@ -6,43 +6,35 @@ export class DataSchemaField { * Display name for the field, also impacts inference results. */ public title: string; - /** * Name of the field in the data schema. */ public name: string; - /** * Whether this field can contain multiple values. */ public isArray: boolean; - /** * Data type of the field. */ public type: string; - /** * Allowed values when type is `classification`. Leave empty for other types. */ public classificationValues?: Array; - /** * Whether to remove duplicate values in the array. * Only applicable if `is_array` is True. */ public uniqueValues?: boolean; - /** * Detailed description of what this field represents. */ public description?: string; - /** * Optional extraction guidelines. */ public guidelines?: string; - /** * Subfields when type is `nested_object`. Leave empty for other types. */ diff --git a/src/v2/client/index.ts b/src/v2/client/index.ts index c31c7b5d..a871d3bf 100644 --- a/src/v2/client/index.ts +++ b/src/v2/client/index.ts @@ -1,2 +1,3 @@ export { DataSchema } from "./dataSchema.js"; export type { PollingOptions, ValidatedPollingOptions } from "./pollingOptions.js"; +export { InferenceParameters } from "./inferenceParameters.js"; diff --git a/src/v2/client/inferenceParameters.ts b/src/v2/client/inferenceParameters.ts new file mode 100644 index 00000000..13687a30 --- /dev/null +++ b/src/v2/client/inferenceParameters.ts @@ -0,0 +1,144 @@ +import { StringDict } from "@/parsing/stringDict.js"; +import { PollingOptions, ValidatedPollingOptions } from "./pollingOptions.js"; +import { DataSchema } from "./dataSchema.js"; + +/** + * Parameters accepted by the asynchronous **inference** v2 endpoint. + * + * All fields are optional except `modelId`. + * + * @category ClientV2 + * @example + * const params = { + * modelId: "YOUR_MODEL_ID", + * rag: true, + * alias: "YOUR_ALIAS", + * webhookIds: ["YOUR_WEBHOOK_ID_1", "YOUR_WEBHOOK_ID_2"], + * pollingOptions: { + * initialDelaySec: 2, + * delaySec: 1.5, + * } + * }; + */ +export class InferenceParameters { + /** + * Model ID to use for the inference. **Required.** + */ + modelId: string; + /** + * Use Retrieval-Augmented Generation during inference. + */ + rag?: boolean; + /** + * Extract the entire text from the document as strings, and fill the `rawText` attribute. + */ + rawText?: boolean; + /** + * Calculate bounding box polygons for values, and fill the `locations` attribute of fields. + */ + polygon?: boolean; + /** + * Calculate confidence scores for values, and fill the `confidence` attribute of fields. + * Useful for automation. + */ + confidence?: boolean; + /** + * Use an alias to link the file to your own DB. + * If empty, no alias will be used. + */ + alias?: string; + /** + * Additional text context used by the model during inference. + * *Not recommended*, for specific use only. + */ + textContext?: string; + /** + * Webhook IDs to call after all processing is finished. + * If empty, no webhooks will be used. + */ + webhookIds?: string[]; + /** + * Client-side polling configuration (see {@link PollingOptions}). + */ + pollingOptions?: PollingOptions; + /** + * By default, the file is closed once the upload is finished. + * Set to `false` to keep it open. + */ + closeFile?: boolean; + /** + * Dynamic changes to the data schema of the model for this inference. + * Not recommended, for specific use only. + */ + dataSchema?: DataSchema | StringDict | string; + + constructor(params: { + modelId: string; + rag?: boolean; + rawText?: boolean; + polygon?: boolean; + confidence?: boolean; + alias?: string; + textContext?: string; + webhookIds?: string[]; + pollingOptions?: PollingOptions; + closeFile?: boolean; + dataSchema?: DataSchema | StringDict | string; + }) { + this.modelId = params.modelId; + this.rag = params.rag; + this.rawText = params.rawText; + this.polygon = params.polygon; + this.confidence = params.confidence; + this.alias = params.alias; + this.textContext = params.textContext; + this.webhookIds = params.webhookIds; + this.closeFile = params.closeFile; + this.pollingOptions = params.pollingOptions; + + if (params.dataSchema !== undefined && params.dataSchema !== null) { + if (!(params.dataSchema instanceof DataSchema)){ + this.dataSchema = new DataSchema(params.dataSchema); + } else { + this.dataSchema = params.dataSchema; + } + } + } + + /** + * Checks the values for asynchronous parsing. Returns their corrected value if they are undefined. + * @returns A valid `AsyncOptions`. + */ + getValidatedPollingOptions(): ValidatedPollingOptions { + const minDelaySec = 1; + const minInitialDelay = 1; + const minRetries = 2; + let newAsyncParams: PollingOptions; + if (this.pollingOptions === undefined) { + newAsyncParams = { + delaySec: 1.5, + initialDelaySec: 2, + maxRetries: 80 + }; + } else { + newAsyncParams = { ...this.pollingOptions }; + if ( + !newAsyncParams.delaySec || + !newAsyncParams.initialDelaySec || + !newAsyncParams.maxRetries + ) { + throw Error("Invalid polling options."); + } + if (newAsyncParams.delaySec < minDelaySec) { + throw Error(`Cannot set auto-parsing delay to less than ${minDelaySec} second(s).`); + } + if (newAsyncParams.initialDelaySec < minInitialDelay) { + throw Error(`Cannot set initial parsing delay to less than ${minInitialDelay} second(s).`); + } + if (newAsyncParams.maxRetries < minRetries) { + throw Error(`Cannot set retry to less than ${minRetries}.`); + } + } + return newAsyncParams as ValidatedPollingOptions; + } +} diff --git a/src/v2/client/pollingOptions.ts b/src/v2/client/pollingOptions.ts index 2ad4a5c8..0a99a5e0 100644 --- a/src/v2/client/pollingOptions.ts +++ b/src/v2/client/pollingOptions.ts @@ -1,5 +1,5 @@ /** - * Parameters for the internal polling loop in {@link ClientV2.enqueueAndGetInference | enqueueAndGetInference()}. + * Parameters for the internal polling loop in {@link v2.Client.enqueueAndGetInference | enqueueAndGetInference()}. * * Default behavior: * - `initialDelaySec` = 2s @@ -24,7 +24,6 @@ * * const inference = await client.enqueueAndGetInference(inputDoc, params); */ - export interface PollingOptions { /** Number of seconds to wait *before the first poll*. */ initialDelaySec?: number; @@ -49,41 +48,3 @@ export interface ValidatedPollingOptions extends PollingOptions { delaySec: number; maxRetries: number; } - -/** - * Checks the values for asynchronous parsing. Returns their corrected value if they are undefined. - * @param asyncParams parameters related to asynchronous parsing - * @returns A valid `AsyncOptions`. - */ -export function setAsyncParams(asyncParams: PollingOptions | undefined = undefined): ValidatedPollingOptions { - const minDelaySec = 1; - const minInitialDelay = 1; - const minRetries = 2; - let newAsyncParams: PollingOptions; - if (asyncParams === undefined) { - newAsyncParams = { - delaySec: 1.5, - initialDelaySec: 2, - maxRetries: 80 - }; - } else { - newAsyncParams = { ...asyncParams }; - if ( - !newAsyncParams.delaySec || - !newAsyncParams.initialDelaySec || - !newAsyncParams.maxRetries - ) { - throw Error("Invalid polling options."); - } - if (newAsyncParams.delaySec < minDelaySec) { - throw Error(`Cannot set auto-parsing delay to less than ${minDelaySec} second(s).`); - } - if (newAsyncParams.initialDelaySec < minInitialDelay) { - throw Error(`Cannot set initial parsing delay to less than ${minInitialDelay} second(s).`); - } - if (newAsyncParams.maxRetries < minRetries) { - throw Error(`Cannot set retry to less than ${minRetries}.`); - } - } - return newAsyncParams as ValidatedPollingOptions; -} diff --git a/src/v2/http/mindeeApiV2.ts b/src/v2/http/mindeeApiV2.ts index 201f750c..3f89533e 100644 --- a/src/v2/http/mindeeApiV2.ts +++ b/src/v2/http/mindeeApiV2.ts @@ -1,6 +1,6 @@ import { ApiSettingsV2 } from "./apiSettingsV2.js"; import { Dispatcher } from "undici"; -import { InferenceParameters } from "@/v2/client.js"; +import { InferenceParameters } from "@/v2/client/index.js"; import { ErrorResponse, InferenceResponse, JobResponse } from "@/v2/parsing/index.js"; import { sendRequestAndReadResponse, BaseHttpResponse } from "@/http/apiCore.js"; import { InputSource, LocalInputSource, UrlInput } from "@/input/index.js"; diff --git a/src/v2/index.ts b/src/v2/index.ts index d36b8748..b9f968f6 100644 --- a/src/v2/index.ts +++ b/src/v2/index.ts @@ -10,6 +10,5 @@ export { RagMetadata, ErrorResponse, } from "./parsing/index.js"; -export type { InferenceParameters } from "./client.js"; -export { DataSchema } from "./client/index.js"; +export { InferenceParameters, DataSchema } from "./client/index.js"; export type { PollingOptions } from "./client/index.js"; diff --git a/tests/v2/client.integration.ts b/tests/v2/client.integration.ts index a58515f0..1dcb4e44 100644 --- a/tests/v2/client.integration.ts +++ b/tests/v2/client.integration.ts @@ -71,7 +71,7 @@ describe("MindeeV2 – Client Integration Tests", () => { it("Empty, multi-page PDF – PathInput - enqueueAndGetInference must succeed", async () => { const source = new PathInput({ inputPath: emptyPdfPath }); - const params: InferenceParameters = { + const params = { modelId, rag: false, rawText: false, @@ -80,7 +80,6 @@ describe("MindeeV2 – Client Integration Tests", () => { webhookIds: [], alias: "ts_integration_empty_multiple" }; - const response = await client.enqueueAndGetInference(source, params); expect(response).to.exist; @@ -98,7 +97,7 @@ describe("MindeeV2 – Client Integration Tests", () => { it("Filled, single-page image – PathInput - enqueueAndGetInference must succeed", async () => { const source = new PathInput({ inputPath: sampleImagePath }); - const params: InferenceParameters = { + const params = { modelId, rag: false, rawText: true, @@ -137,7 +136,7 @@ describe("MindeeV2 – Client Integration Tests", () => { it("Filled, single-page image – Base64Input - enqueueAndGetInference must succeed", async () => { const data = fs.readFileSync(sampleBase64Path, "utf8"); const source = new Base64Input({ inputString: data, filename: "receipt.jpg" }); - const params: InferenceParameters = { + const params = new InferenceParameters({ modelId, rag: false, rawText: false, @@ -145,7 +144,7 @@ describe("MindeeV2 – Client Integration Tests", () => { confidence: false, webhookIds: [], alias: "ts_integration_base64_filled_single" - }; + }); const response = await client.enqueueAndGetInference(source, params); @@ -166,7 +165,7 @@ describe("MindeeV2 – Client Integration Tests", () => { it("Invalid model ID – enqueue must raise 422", async () => { const source = new PathInput({ inputPath: emptyPdfPath }); - const badParams: InferenceParameters = { modelId: "00000000-0000-0000-0000-000000000000" }; + const badParams = { modelId: "00000000-0000-0000-0000-000000000000" }; try { await client.enqueueInference(source, badParams); @@ -188,7 +187,7 @@ describe("MindeeV2 – Client Integration Tests", () => { it("HTTPS URL – enqueue & get inference must succeed", async () => { const url = process.env.MINDEE_V2_SE_TESTS_BLANK_PDF_URL ?? "error-no-url-found"; const source = new UrlInput({ url }); - const params: InferenceParameters = { + const params = new InferenceParameters({ modelId, rag: false, rawText: false, @@ -196,8 +195,7 @@ describe("MindeeV2 – Client Integration Tests", () => { confidence: false, webhookIds: [], alias: "ts_integration_url_source" - }; - + }); const response: InferenceResponse = await client.enqueueAndGetInference(source, params); expect(response).to.exist; @@ -206,7 +204,7 @@ describe("MindeeV2 – Client Integration Tests", () => { it("Data Schema Override - Overrides the data schema successfully", async () => { const source = new PathInput({ inputPath: emptyPdfPath }); - const params: InferenceParameters = { + const params = new InferenceParameters({ modelId, rag: false, rawText: false, @@ -215,7 +213,7 @@ describe("MindeeV2 – Client Integration Tests", () => { webhookIds: [], dataSchema: dataSchemaReplace, alias: "ts_integration_data_schema_replace" - }; + }); const response = await client.enqueueAndGetInference(source, params); expect(response).to.exist; diff --git a/tests/v2/client/inferenceParameter.spec.ts b/tests/v2/client/inferenceParameter.spec.ts new file mode 100644 index 00000000..a3fab355 --- /dev/null +++ b/tests/v2/client/inferenceParameter.spec.ts @@ -0,0 +1,65 @@ +import { StringDict } from "@/parsing/index.js"; +import path from "path"; +import { V2_RESOURCE_PATH } from "../../index.js"; +import { InferenceParameters } from "@/index.js"; +import { expect } from "chai"; +import { DataSchema } from "@/index.js"; +import { promises as fs } from "fs"; + +let expectedDataSchemaDict: StringDict; +let expectedDataSchemaString: string; +let expectedDataSchemaObject: DataSchema; + +describe("MindeeV2 - Inference Parameter", () => { + const modelIdValue = "test-model-id"; + + describe("Polling Options", () => { + it("should provide sensible defaults", () => { + + const paramsInstance = new InferenceParameters({ + modelId: modelIdValue, + }); + expect(paramsInstance.modelId).to.equal(modelIdValue); + expect(paramsInstance.getValidatedPollingOptions()).to.deep.equal({ + delaySec: 1.5, + initialDelaySec: 2, + maxRetries: 80 + }); + }); + }); + + describe("Data Schema", () => { + before(async () => { + const fileContents = await fs.readFile(path.join(V2_RESOURCE_PATH, "inference/data_schema_replace_param.json")); + expectedDataSchemaDict = JSON.parse(fileContents.toString()); + expectedDataSchemaString = JSON.stringify(expectedDataSchemaDict); + expectedDataSchemaObject = new DataSchema(expectedDataSchemaDict); + }); + + it("shouldn't replace when unset", () => { + const params = new InferenceParameters({ + modelId: modelIdValue, + }); + expect(params.dataSchema).to.be.undefined; + }); + + it("should equate no matter the type", () => { + const paramsDict = new InferenceParameters({ + modelId: modelIdValue, + dataSchema: expectedDataSchemaDict, + }); + const paramsString = new InferenceParameters({ + modelId: modelIdValue, + dataSchema: expectedDataSchemaString, + }); + const paramsObject = new InferenceParameters({ + modelId: modelIdValue, + dataSchema: expectedDataSchemaObject, + }); + + expect(JSON.stringify(paramsDict.dataSchema)).to.eq(expectedDataSchemaString); + expect(paramsObject.dataSchema?.toString()).to.eq(expectedDataSchemaString); + expect(paramsString.dataSchema?.toString()).to.eq(expectedDataSchemaString); + }); + }); +}); diff --git a/tests/v2/input/inferenceParameter.spec.ts b/tests/v2/input/inferenceParameter.spec.ts deleted file mode 100644 index eb5b3e80..00000000 --- a/tests/v2/input/inferenceParameter.spec.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { StringDict } from "@/parsing/index.js"; -import path from "path"; -import { V2_RESOURCE_PATH } from "../../index.js"; -import { InferenceParameters } from "@/index.js"; -import { expect } from "chai"; -import { DataSchema } from "@/index.js"; -import { promises as fs } from "fs"; - -let expectedDataSchemaDict: StringDict; -let expectedDataSchemaString: string; -let expectedDataSchemaObject: DataSchema; - -describe("MindeeV2 - Inference Parameter", () => { - before(async () => { - const fileContents = await fs.readFile(path.join(V2_RESOURCE_PATH, "inference/data_schema_replace_param.json")); - expectedDataSchemaDict = JSON.parse(fileContents.toString()); - expectedDataSchemaString = JSON.stringify(expectedDataSchemaDict); - expectedDataSchemaObject = new DataSchema(expectedDataSchemaDict); - }); - - describe("dataSchema", () => { - it("shouldn't replace when unset", async () => { - const params: InferenceParameters = { - modelId: "test-model-id", - }; - - expect(params.dataSchema).to.be.undefined; - }); - - it("should equate no matter the type", async () => { - const paramsDict: InferenceParameters = { - modelId: "test-model-id", - dataSchema: expectedDataSchemaDict, - }; - const paramsString: InferenceParameters = { - modelId: "test-model-id", - dataSchema: expectedDataSchemaString, - }; - const paramsObject: InferenceParameters = { - modelId: "test-model-id", - dataSchema: expectedDataSchemaObject, - }; - - expect(JSON.stringify(paramsDict.dataSchema)).to.eq(expectedDataSchemaString); - expect(paramsObject.dataSchema?.toString()).to.eq(expectedDataSchemaString); - expect(paramsString.dataSchema?.toString()).to.eq(expectedDataSchemaString); - }); - }); -}); diff --git a/tests/v2/input/localResponse.spec.ts b/tests/v2/parsing/localResponse.spec.ts similarity index 100% rename from tests/v2/input/localResponse.spec.ts rename to tests/v2/parsing/localResponse.spec.ts