jws-frontend/src/utils/ocr.ts

45 lines
1.2 KiB
TypeScript
Raw Normal View History

2024-10-02 17:09:26 +07:00
import { createWorker, ImageLike, RecognizeResult, Worker } from 'tesseract.js';
2024-10-02 13:55:52 +07:00
import { parseMRZ } from './mrz';
2024-10-02 17:09:26 +07:00
let worker: Worker;
2024-10-02 13:55:52 +07:00
2024-10-02 17:09:26 +07:00
export function runOcr(image: ImageLike): Promise<RecognizeResult>;
2024-10-02 17:06:57 +07:00
export function runOcr<
CallbackFunction extends (result: RecognizeResult) => void,
>(
2024-10-02 13:55:52 +07:00
image: ImageLike,
2024-10-02 17:06:57 +07:00
callback: CallbackFunction,
): Promise<ReturnType<CallbackFunction>>;
2024-10-02 13:55:52 +07:00
export async function runOcr<T extends (result: RecognizeResult) => void>(
image: ImageLike,
callback?: T,
) {
if (!worker) {
2024-10-03 08:55:19 +07:00
worker = await createWorker(['ocrb', 'eng'], 1, {
2024-10-02 13:55:52 +07:00
langPath: '/ocr-data',
});
}
2024-10-02 17:09:26 +07:00
if (callback) {
return callback(await worker.recognize(image));
}
return await worker.recognize(image);
2024-10-02 13:55:52 +07:00
}
export function parseResultMRZ(result: RecognizeResult) {
const zone = result.data.text
.split(/[\s\r\n]+/)
.filter((v) => /[A-Z0-9<]{30,}/.test(v));
2024-10-02 16:50:51 +07:00
if (zone.length === 2 && zone[0].length >= 44) {
2024-10-02 13:55:52 +07:00
return parseMRZ({ type: 'TD3', zone });
2024-10-02 16:50:51 +07:00
} else if (zone.length === 2 && zone[0].length >= 36) {
return parseMRZ({ type: 'TD2', zone });
} else if (zone.length === 3 && zone[0].length >= 30) {
return parseMRZ({ type: 'TD1', zone });
2024-10-02 13:55:52 +07:00
} else {
2024-10-02 16:50:51 +07:00
return { mrz: { type: 'UNKNOWN', zone }, result: null };
2024-10-02 13:55:52 +07:00
}
}