44 lines
1.2 KiB
TypeScript
44 lines
1.2 KiB
TypeScript
import { createWorker, ImageLike, RecognizeResult, Worker } from 'tesseract.js';
|
|
|
|
import { parseMRZ } from './mrz';
|
|
|
|
let worker: Worker;
|
|
|
|
export function runOcr(image: ImageLike): Promise<RecognizeResult>;
|
|
export function runOcr<
|
|
CallbackFunction extends (result: RecognizeResult) => void,
|
|
>(
|
|
image: ImageLike,
|
|
callback: CallbackFunction,
|
|
): Promise<ReturnType<CallbackFunction>>;
|
|
export async function runOcr<T extends (result: RecognizeResult) => void>(
|
|
image: ImageLike,
|
|
callback?: T,
|
|
) {
|
|
if (!worker) {
|
|
worker = await createWorker(['ocrb', 'eng'], 1, {
|
|
langPath: '/ocr-data',
|
|
});
|
|
}
|
|
|
|
if (callback) {
|
|
return callback(await worker.recognize(image));
|
|
}
|
|
return await worker.recognize(image);
|
|
}
|
|
|
|
export function parseResultMRZ(result: RecognizeResult) {
|
|
const zone = result.data.text
|
|
.split(/[\s\r\n]+/)
|
|
.filter((v) => /[A-Z0-9<]{30,}/.test(v));
|
|
|
|
if (zone.length === 2 && zone[0].length >= 44) {
|
|
return parseMRZ({ type: 'TD3', zone });
|
|
} else if (zone.length === 2 && zone[0].length >= 36) {
|
|
return parseMRZ({ type: 'TD2', zone });
|
|
} else if (zone.length === 3 && zone[0].length >= 30) {
|
|
return parseMRZ({ type: 'TD1', zone });
|
|
} else {
|
|
return { mrz: { type: 'UNKNOWN', zone }, result: null };
|
|
}
|
|
}
|