jws-frontend/src/utils/ocr.ts
2024-10-02 13:55:52 +07:00

38 lines
1.1 KiB
TypeScript

import { createWorker, ImageLike, RecognizeResult } from 'tesseract.js';
import { parseMRZ } from './mrz';
let worker: Tesseract.Worker;
export function runOcr(image: ImageLike): Promise<void>;
export function runOcr<T extends (result: RecognizeResult) => void>(
image: ImageLike,
callback: T,
): Promise<ReturnType<T>>;
export async function runOcr<T extends (result: RecognizeResult) => void>(
image: ImageLike,
callback?: T,
) {
if (!worker) {
worker = await createWorker(['ocrb', 'eng', 'khm'], 1, {
langPath: '/ocr-data',
});
}
if (callback) return callback(await worker.recognize(image));
}
export function parseResultMRZ(result: RecognizeResult) {
const zone = result.data.text
.split(/[\s\r\n]+/)
.filter((v) => /[A-Z0-9<]{30,}/.test(v));
if (zone.length === 3 && zone[0].length === 30) {
return parseMRZ({ type: 'TD1', zone });
} else if (zone.length === 2 && zone[0].length === 36) {
return parseMRZ({ type: 'TD2', zone });
} else if (zone.length === 2 && zone[0].length === 44) {
return parseMRZ({ type: 'TD3', zone });
} else {
return null;
}
}