Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export {
JobResponse,
RawText,
RagMetadata,
InferenceParameters,
DataSchema,
} from "./v2/index.js";
export type { InferenceParameters, PollingOptions } from "./v2/index.js";
export type { PollingOptions } from "./v2/index.js";
102 changes: 27 additions & 75 deletions src/v2/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,62 +3,10 @@ import { Dispatcher } from "undici";
import { InputSource } from "@/input/index.js";
import { errorHandler } from "@/errors/handler.js";
import { LOG_LEVELS, logger } from "@/logger.js";
import { StringDict } from "@/parsing/stringDict.js";
import { ErrorResponse, InferenceResponse, JobResponse } from "./parsing/index.js";
import { MindeeApiV2 } from "./http/mindeeApiV2.js";
import { MindeeHttpErrorV2 } from "./http/errors.js";
import { PollingOptions, DataSchema } from "./client/index.js";
import { setAsyncParams } from "./client/pollingOptions.js";

/**
* Parameters accepted by the asynchronous **inference** v2 endpoint.
*
* All fields are optional except `modelId`.
*
* @category ClientV2
* @example
* const params = {
* modelId: "YOUR_MODEL_ID",
* rag: true,
* alias: "YOUR_ALIAS",
* webhookIds: ["YOUR_WEBHOOK_ID_1", "YOUR_WEBHOOK_ID_2"],
* pollingOptions: {
* initialDelaySec: 2,
* delaySec: 1.5,
* }
* };
*/
export interface InferenceParameters {
/** Model ID to use for the inference. **Required** */
modelId: string;
/** Use Retrieval-Augmented Generation during inference. */
rag?: boolean;
/** Extract the entire text from the document as strings, and fill the `rawText` attribute. */
rawText?: boolean;
/** Calculate bounding box polygons for values, and fill the `locations` attribute of fields. */
polygon?: boolean;
/** Calculate confidence scores for values, and fill the `confidence` attribute of fields.
* Useful for automation.*/
confidence?: boolean;
/** Use an alias to link the file to your own DB. If empty, no alias will be used. */
alias?: string;
/** Additional text context used by the model during inference.
* *Not recommended*, for specific use only. */
textContext?: string;
/** Webhook IDs to call after all processing is finished.
* If empty, no webhooks will be used. */
webhookIds?: string[];
/** Client-side polling configuration (see {@link PollingOptions}). */
pollingOptions?: PollingOptions;
/** By default, the file is closed once the upload is finished.
* Set to `false` to keep it open. */
closeFile?: boolean;
/**
* Dynamic changes to the data schema of the model for this inference.
* Not recommended, for specific use only.
*/
dataSchema?: DataSchema|StringDict|string;
}
import { InferenceParameters } from "./client/index.js";

/**
* Options for the V2 Mindee Client.
Expand Down Expand Up @@ -111,18 +59,6 @@ export class Client {
logger.debug("Client V2 Initialized");
}

/**
* Checks the Data Schema.
* @param params Input Inference parameters.
*/
validateDataSchema(params: InferenceParameters): void {
if (params.dataSchema !== undefined && params.dataSchema !== null){
if (!(params.dataSchema instanceof DataSchema)){
params.dataSchema = new DataSchema(params.dataSchema);
}
}
}

/**
* Send the document to an asynchronous endpoint and return its ID in the queue.
* @param inputSource file or URL to parse.
Expand All @@ -132,15 +68,18 @@ export class Client {
*/
async enqueueInference(
inputSource: InputSource,
params: InferenceParameters
params: InferenceParameters| ConstructorParameters<typeof InferenceParameters>[0]
): Promise<JobResponse> {
if (inputSource === undefined) {
throw new Error("The 'enqueue' function requires an input document.");
}
this.validateDataSchema(params);
const inferenceParams = params instanceof InferenceParameters
? params
: new InferenceParameters(params);

await inputSource.init();

return await this.mindeeApi.reqPostInferenceEnqueue(inputSource, params);
return await this.mindeeApi.reqPostInferenceEnqueue(inputSource, inferenceParams);
}

/**
Expand Down Expand Up @@ -183,9 +122,14 @@ export class Client {
*/
async enqueueAndGetInference(
inputSource: InputSource,
params: InferenceParameters
params: InferenceParameters| ConstructorParameters<typeof InferenceParameters>[0]
): Promise<InferenceResponse> {
const validatedAsyncParams = setAsyncParams(params.pollingOptions);
const inferenceParams = params instanceof InferenceParameters
? params
: new InferenceParameters(params);

const pollingOptions = inferenceParams.getValidatedPollingOptions();

const enqueueResponse: JobResponse = await this.enqueueInference(inputSource, params);
if (enqueueResponse.job.id === undefined || enqueueResponse.job.id.length === 0) {
logger.error(`Failed enqueueing:\n${enqueueResponse.getRawHttp()}`);
Expand All @@ -196,10 +140,14 @@ export class Client {
`Successfully enqueued document with job id: ${queueId}.`
);

await setTimeout(validatedAsyncParams.initialDelaySec * 1000, undefined, validatedAsyncParams.initialTimerOptions);
await setTimeout(
pollingOptions.initialDelaySec * 1000,
undefined,
pollingOptions.initialTimerOptions
);
let retryCounter: number = 1;
let pollResults: JobResponse = await this.getJob(queueId);
while (retryCounter < validatedAsyncParams.maxRetries) {
while (retryCounter < pollingOptions.maxRetries) {
if (pollResults.job.status === "Failed") {
break;
}
Expand All @@ -208,10 +156,14 @@ export class Client {
}
logger.debug(
`Polling server for parsing result with queueId: ${queueId}.
Attempt no. ${retryCounter} of ${validatedAsyncParams.maxRetries}.
Attempt no. ${retryCounter} of ${pollingOptions.maxRetries}.
Job status: ${pollResults.job.status}.`
);
await setTimeout(validatedAsyncParams.delaySec * 1000, undefined, validatedAsyncParams.recurringTimerOptions);
await setTimeout(
pollingOptions.delaySec * 1000,
undefined,
pollingOptions.recurringTimerOptions
);
pollResults = await this.getJob(queueId);
retryCounter++;
}
Expand All @@ -221,7 +173,7 @@ Job status: ${pollResults.job.status}.`
}
throw Error(
"Asynchronous parsing request timed out after " +
validatedAsyncParams.delaySec * retryCounter +
pollingOptions.delaySec * retryCounter +
" seconds"
);
}
Expand Down
8 changes: 0 additions & 8 deletions src/v2/client/dataSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,35 @@ export class DataSchemaField {
* Display name for the field, also impacts inference results.
*/
public title: string;

/**
* Name of the field in the data schema.
*/
public name: string;

/**
* Whether this field can contain multiple values.
*/
public isArray: boolean;

/**
* Data type of the field.
*/
public type: string;

/**
* Allowed values when type is `classification`. Leave empty for other types.
*/
public classificationValues?: Array<string>;

/**
* Whether to remove duplicate values in the array.
* Only applicable if `is_array` is True.
*/
public uniqueValues?: boolean;

/**
* Detailed description of what this field represents.
*/
public description?: string;

/**
* Optional extraction guidelines.
*/
public guidelines?: string;

/**
* Subfields when type is `nested_object`. Leave empty for other types.
*/
Expand Down
1 change: 1 addition & 0 deletions src/v2/client/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export { DataSchema } from "./dataSchema.js";
export type { PollingOptions, ValidatedPollingOptions } from "./pollingOptions.js";
export { InferenceParameters } from "./inferenceParameters.js";
144 changes: 144 additions & 0 deletions src/v2/client/inferenceParameters.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import { StringDict } from "@/parsing/stringDict.js";
import { PollingOptions, ValidatedPollingOptions } from "./pollingOptions.js";
import { DataSchema } from "./dataSchema.js";

/**
* Parameters accepted by the asynchronous **inference** v2 endpoint.
*
* All fields are optional except `modelId`.
*
* @category ClientV2
* @example
* const params = {
* modelId: "YOUR_MODEL_ID",
* rag: true,
* alias: "YOUR_ALIAS",
* webhookIds: ["YOUR_WEBHOOK_ID_1", "YOUR_WEBHOOK_ID_2"],
* pollingOptions: {
* initialDelaySec: 2,
* delaySec: 1.5,
* }
* };
*/
export class InferenceParameters {
/**
* Model ID to use for the inference. **Required.**
*/
modelId: string;
/**
* Use Retrieval-Augmented Generation during inference.
*/
rag?: boolean;
/**
* Extract the entire text from the document as strings, and fill the `rawText` attribute.
*/
rawText?: boolean;
/**
* Calculate bounding box polygons for values, and fill the `locations` attribute of fields.
*/
polygon?: boolean;
/**
* Calculate confidence scores for values, and fill the `confidence` attribute of fields.
* Useful for automation.
*/
confidence?: boolean;
/**
* Use an alias to link the file to your own DB.
* If empty, no alias will be used.
*/
alias?: string;
/**
* Additional text context used by the model during inference.
* *Not recommended*, for specific use only.
*/
textContext?: string;
/**
* Webhook IDs to call after all processing is finished.
* If empty, no webhooks will be used.
*/
webhookIds?: string[];
/**
* Client-side polling configuration (see {@link PollingOptions}).
*/
pollingOptions?: PollingOptions;
/**
* By default, the file is closed once the upload is finished.
* Set to `false` to keep it open.
*/
closeFile?: boolean;
/**
* Dynamic changes to the data schema of the model for this inference.
* Not recommended, for specific use only.
*/
dataSchema?: DataSchema | StringDict | string;

constructor(params: {
modelId: string;
rag?: boolean;
rawText?: boolean;
polygon?: boolean;
confidence?: boolean;
alias?: string;
textContext?: string;
webhookIds?: string[];
pollingOptions?: PollingOptions;
closeFile?: boolean;
dataSchema?: DataSchema | StringDict | string;
}) {
this.modelId = params.modelId;
this.rag = params.rag;
this.rawText = params.rawText;
this.polygon = params.polygon;
this.confidence = params.confidence;
this.alias = params.alias;
this.textContext = params.textContext;
this.webhookIds = params.webhookIds;
this.closeFile = params.closeFile;
this.pollingOptions = params.pollingOptions;

if (params.dataSchema !== undefined && params.dataSchema !== null) {
if (!(params.dataSchema instanceof DataSchema)){
this.dataSchema = new DataSchema(params.dataSchema);
} else {
this.dataSchema = params.dataSchema;
}
}
}

/**
* Checks the values for asynchronous parsing. Returns their corrected value if they are undefined.
* @returns A valid `AsyncOptions`.
*/
getValidatedPollingOptions(): ValidatedPollingOptions {
const minDelaySec = 1;
const minInitialDelay = 1;
const minRetries = 2;
let newAsyncParams: PollingOptions;
if (this.pollingOptions === undefined) {
newAsyncParams = {
delaySec: 1.5,
initialDelaySec: 2,
maxRetries: 80
};
} else {
newAsyncParams = { ...this.pollingOptions };
if (
!newAsyncParams.delaySec ||
!newAsyncParams.initialDelaySec ||
!newAsyncParams.maxRetries
) {
throw Error("Invalid polling options.");
}
if (newAsyncParams.delaySec < minDelaySec) {
throw Error(`Cannot set auto-parsing delay to less than ${minDelaySec} second(s).`);
}
if (newAsyncParams.initialDelaySec < minInitialDelay) {
throw Error(`Cannot set initial parsing delay to less than ${minInitialDelay} second(s).`);
}
if (newAsyncParams.maxRetries < minRetries) {
throw Error(`Cannot set retry to less than ${minRetries}.`);
}
}
return newAsyncParams as ValidatedPollingOptions;
}
}
Loading