jws-frontend/src/utils/ocr.ts
2024-10-03 08:55:19 +07:00

44 lines
1.2 KiB
TypeScript

import { createWorker, ImageLike, RecognizeResult, Worker } from 'tesseract.js';
import { parseMRZ } from './mrz';
let worker: Worker;
export function runOcr(image: ImageLike): Promise<RecognizeResult>;
export function runOcr<
CallbackFunction extends (result: RecognizeResult) => void,
>(
image: ImageLike,
callback: CallbackFunction,
): Promise<ReturnType<CallbackFunction>>;
export async function runOcr<T extends (result: RecognizeResult) => void>(
image: ImageLike,
callback?: T,
) {
if (!worker) {
worker = await createWorker(['ocrb', 'eng'], 1, {
langPath: '/ocr-data',
});
}
if (callback) {
return callback(await worker.recognize(image));
}
return await worker.recognize(image);
}
export function parseResultMRZ(result: RecognizeResult) {
const zone = result.data.text
.split(/[\s\r\n]+/)
.filter((v) => /[A-Z0-9<]{30,}/.test(v));
if (zone.length === 2 && zone[0].length >= 44) {
return parseMRZ({ type: 'TD3', zone });
} else if (zone.length === 2 && zone[0].length >= 36) {
return parseMRZ({ type: 'TD2', zone });
} else if (zone.length === 3 && zone[0].length >= 30) {
return parseMRZ({ type: 'TD1', zone });
} else {
return { mrz: { type: 'UNKNOWN', zone }, result: null };
}
}