feat: add ocr function
This commit is contained in:
parent
adc3f8e068
commit
263d575955
4 changed files with 244 additions and 8 deletions
135
src/utils/mrz.ts
Normal file
135
src/utils/mrz.ts
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
type MRZ = {
|
||||
type: 'TD1' | 'TD2' | 'TD3';
|
||||
zone: string[];
|
||||
};
|
||||
|
||||
const MRZ_TD_1 = [
|
||||
new RegExp(
|
||||
[
|
||||
'(?<doc_type>[0-9A-Z<]{1})',
|
||||
'(?<doc_subtype>[A-Z<]{1})',
|
||||
'(?<country>[0-9A-Z<]{3})',
|
||||
'(?<doc_number>[0-9A-Z<]{9})',
|
||||
'(?<doc_number_check>[0-9A-Z<]{1})',
|
||||
'(?<complement>[0-9A-Z<]{15})',
|
||||
].join(''),
|
||||
),
|
||||
new RegExp(
|
||||
[
|
||||
'(?<birth_date>[0-9A-Z<]{6})',
|
||||
'(?<birth_date_check>[0-9A-Z<]{1})',
|
||||
'(?<sex>[mfMF<]{1})',
|
||||
'(?<expire_date>[0-9A-Z<]{6})',
|
||||
'(?<expire_date_check>[0-9A-Z<]{1})',
|
||||
'(?<nationality>[0-9A-Z<]{3})',
|
||||
'(?<optional_data>[A-Z0-9<]{11})',
|
||||
'(?<linecheck>[0-9A-Z<]{1})',
|
||||
].join(''),
|
||||
),
|
||||
new RegExp(['(?<full_name>[A-Z<]{30})'].join('')),
|
||||
];
|
||||
const MRZ_TD_2 = [
|
||||
new RegExp(
|
||||
[
|
||||
'(?<doc_type>[0-9A-Z<]{1})',
|
||||
'(?<doc_subtype>[A-Z<]{1})',
|
||||
'(?<country>[0-9A-Z<]{3})',
|
||||
'(?<full_name>[A-Z<]{31})',
|
||||
].join(''),
|
||||
),
|
||||
|
||||
new RegExp(
|
||||
[
|
||||
'(?<doc_number>[0-9A-Z<]{9})',
|
||||
'(?<doc_numbercheck>[0-9A-Z<]{1})',
|
||||
'(?<nacionality>[0-9A-Z<]{3})',
|
||||
'(?<birth_date>[0-9A-Z<]{6})',
|
||||
'(?<birth_date_check>[0-9A-Z<]{1})',
|
||||
'(?<sex>[mfMF]{1})',
|
||||
'(?<expire_date>[0-9A-Z<]{6})',
|
||||
'(?<expire_date_check>[0-9A-Z<]{1})',
|
||||
'(?<optional_data>[A-Z0-9<]{7})',
|
||||
'(?<line_check>[0-9A-Z<]{1})',
|
||||
].join(''),
|
||||
),
|
||||
];
|
||||
|
||||
const MRZ_TD_3 = [
|
||||
new RegExp(
|
||||
[
|
||||
'(?<doc_type>[A-Z0-9<]{1})',
|
||||
'(?<doc_subtype>[A-Z0-9<]{1})',
|
||||
'(?<country>[A-Z0-9]{3})',
|
||||
'(?<full_name>[A-Z0-9<]{39})',
|
||||
].join(''),
|
||||
),
|
||||
|
||||
new RegExp(
|
||||
[
|
||||
'(?<doc_number>[0-9A-Z<]{9})',
|
||||
'(?<doc_number_check>[0-9A-Z<]{1})',
|
||||
'(?<nationality>[0-9A-Z<]{3})',
|
||||
'(?<birth_date>[0-9A-Z<]{6})',
|
||||
'(?<birth_date_check>[0-9A-Z<]{1})',
|
||||
'(?<sex>[mfMF<]{1})',
|
||||
'(?<expire_date>[0-9A-Z<]{6})',
|
||||
'(?<expire_date_check>[0-9A-Z<]{1})',
|
||||
'(?<personal_number>[A-Z0-9<]{14})',
|
||||
'(?<personal_number_check>[0-9A-Z<]{1})',
|
||||
'(?<linecheck>[0-9A-Z<]{1})',
|
||||
].join(''),
|
||||
),
|
||||
];
|
||||
|
||||
function mrzCleanResult(obj: Record<string, string>) {
|
||||
Object.entries(obj).forEach(([k, v]) => {
|
||||
obj[k] = v
|
||||
.replace(/</g, ' ')
|
||||
.replace(/\s{2,}/, ' ')
|
||||
.trim();
|
||||
});
|
||||
return obj;
|
||||
}
|
||||
|
||||
export function parseType1(mrz: MRZ) {
|
||||
const result: Record<string, string> = {};
|
||||
|
||||
mrz.zone.forEach((line, i) => {
|
||||
if (MRZ_TD_1[i].test(line)) {
|
||||
Object.assign(result, MRZ_TD_1[i].exec(line)?.groups);
|
||||
}
|
||||
});
|
||||
|
||||
return { mrz, result: mrzCleanResult(result) };
|
||||
}
|
||||
|
||||
export function parseType2(mrz: MRZ) {
|
||||
const result: Record<string, string> = {};
|
||||
|
||||
mrz.zone.forEach((line, i) => {
|
||||
if (MRZ_TD_2[i].test(line)) {
|
||||
Object.assign(result, MRZ_TD_2[i].exec(line)?.groups);
|
||||
}
|
||||
});
|
||||
|
||||
return { mrz, result: mrzCleanResult(result) };
|
||||
}
|
||||
|
||||
export function parseType3(mrz: MRZ) {
|
||||
const result: Record<string, string> = {};
|
||||
|
||||
mrz.zone.forEach((line, i) => {
|
||||
if (MRZ_TD_3[i].test(line)) {
|
||||
Object.assign(result, MRZ_TD_3[i].exec(line)?.groups);
|
||||
}
|
||||
});
|
||||
|
||||
return { mrz, result: mrzCleanResult(result) };
|
||||
}
|
||||
|
||||
export function parseMRZ(mrz: MRZ) {
|
||||
if (mrz.type === 'TD1') return parseType1(mrz);
|
||||
if (mrz.type === 'TD2') return parseType2(mrz);
|
||||
if (mrz.type === 'TD3') return parseType3(mrz);
|
||||
return null;
|
||||
}
|
||||
38
src/utils/ocr.ts
Normal file
38
src/utils/ocr.ts
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
import { createWorker, ImageLike, RecognizeResult } from 'tesseract.js';
|
||||
|
||||
import { parseMRZ } from './mrz';
|
||||
|
||||
let worker: Tesseract.Worker;
|
||||
|
||||
export function runOcr(image: ImageLike): Promise<void>;
|
||||
export function runOcr<T extends (result: RecognizeResult) => void>(
|
||||
image: ImageLike,
|
||||
callback: T,
|
||||
): Promise<ReturnType<T>>;
|
||||
export async function runOcr<T extends (result: RecognizeResult) => void>(
|
||||
image: ImageLike,
|
||||
callback?: T,
|
||||
) {
|
||||
if (!worker) {
|
||||
worker = await createWorker(['ocrb', 'eng', 'khm'], 1, {
|
||||
langPath: '/ocr-data',
|
||||
});
|
||||
}
|
||||
if (callback) return callback(await worker.recognize(image));
|
||||
}
|
||||
|
||||
export function parseResultMRZ(result: RecognizeResult) {
|
||||
const zone = result.data.text
|
||||
.split(/[\s\r\n]+/)
|
||||
.filter((v) => /[A-Z0-9<]{30,}/.test(v));
|
||||
|
||||
if (zone.length === 3 && zone[0].length === 30) {
|
||||
return parseMRZ({ type: 'TD1', zone });
|
||||
} else if (zone.length === 2 && zone[0].length === 36) {
|
||||
return parseMRZ({ type: 'TD2', zone });
|
||||
} else if (zone.length === 2 && zone[0].length === 44) {
|
||||
return parseMRZ({ type: 'TD3', zone });
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue