feat: add ocr function

This commit is contained in:
Methapon Metanipat 2024-10-02 13:55:52 +07:00
parent adc3f8e068
commit 263d575955
4 changed files with 244 additions and 8 deletions

View file

@ -27,6 +27,7 @@
"quasar": "^2.16.9",
"signature_pad": "^5.0.2",
"socket.io-client": "^4.7.5",
"tesseract.js": "^5.1.1",
"uuid": "^10.0.0",
"vue": "^3.4.38",
"vue-i18n": "^9.14.0",

78
pnpm-lock.yaml generated
View file

@ -47,6 +47,9 @@ importers:
socket.io-client:
specifier: ^4.7.5
version: 4.7.5
tesseract.js:
specifier: ^5.1.1
version: 5.1.1
uuid:
specifier: ^10.0.0
version: 10.0.0
@ -1140,6 +1143,9 @@ packages:
bluebird@3.7.2:
resolution: {integrity: sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==}
bmp-js@0.1.0:
resolution: {integrity: sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==}
bn.js@4.12.0:
resolution: {integrity: sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==}
@ -1262,7 +1268,6 @@ packages:
chokidar@2.1.8:
resolution: {integrity: sha512-ZmZUazfOzf0Nve7duiCKD23PFSCs4JPoYyccjUFF3aQkQadqBhfzhjkwBH2mNOG9cTBwhamM37EIsIkZw3nRgg==}
deprecated: Chokidar 2 does not receive security updates since 2019. Upgrade to chokidar 3 with 15x fewer dependencies
chokidar@3.6.0:
resolution: {integrity: sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==}
@ -1880,7 +1885,7 @@ packages:
resolution: {integrity: sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==}
engines: {node: '>= 4.0'}
os: [darwin]
deprecated: The v1 package contains DANGEROUS / INSECURE binaries. Upgrade to safe fsevents v2
deprecated: Upgrade to fsevents v2 to mitigate potential security issues
fsevents@2.3.2:
resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==}
@ -2019,6 +2024,9 @@ packages:
resolution: {integrity: sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==}
engines: {node: '>=0.10.0'}
idb-keyval@6.2.1:
resolution: {integrity: sha512-8Sb3veuYCyrZL+VBt9LJfZjLUPWVvqn8tG28VqYNFCo43KHcKuq+b4EiXGeuaLAQWL2YmyDgMp2aSpH9JHsEQg==}
ieee754@1.2.1:
resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
@ -2097,6 +2105,9 @@ packages:
engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
hasBin: true
is-electron@2.2.2:
resolution: {integrity: sha512-FO/Rhvz5tuw4MCWkpMzHFKWD2LsfHzIb7i6MdPYZ/KW7AlxawyLkqdy+jPZP1WubqEADE3O4FUENlJHDfQASRg==}
is-extendable@0.1.1:
resolution: {integrity: sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==}
engines: {node: '>=0.10.0'}
@ -2154,6 +2165,9 @@ packages:
resolution: {integrity: sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==}
engines: {node: '>=10'}
is-url@1.2.4:
resolution: {integrity: sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==}
is-windows@1.0.2:
resolution: {integrity: sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA==}
engines: {node: '>=0.10.0'}
@ -2594,6 +2608,10 @@ packages:
resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==}
engines: {node: '>=12'}
opencollective-postinstall@2.0.3:
resolution: {integrity: sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==}
hasBin: true
optionator@0.9.3:
resolution: {integrity: sha512-JjCoypp+jKn1ttEFExxhetCKeJt9zhAgAve5FXHixTvFDW/5aEktX9bufBKLRRMdU7bNtpLfcGu94B3cdEJgjg==}
engines: {node: '>= 0.8.0'}
@ -2890,6 +2908,9 @@ packages:
resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==}
engines: {node: '>=8.10.0'}
regenerator-runtime@0.13.11:
resolution: {integrity: sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==}
regex-not@1.0.2:
resolution: {integrity: sha512-J6SDjUgDxQj5NusnOtdFxDwN/+HWykR8GELwctJ7mdqhcyy1xEc4SRFHUXvxTp661YaVKAjfRLZ9cCqS6tn32A==}
engines: {node: '>=0.10.0'}
@ -3255,6 +3276,12 @@ packages:
engines: {node: '>=10'}
hasBin: true
tesseract.js-core@5.1.1:
resolution: {integrity: sha512-KX3bYSU5iGcO1XJa+QGPbi+Zjo2qq6eBhNjSGR5E5q0JtzkoipJKOUQD7ph8kFyteCEfEQ0maWLu8MCXtvX5uQ==}
tesseract.js@5.1.1:
resolution: {integrity: sha512-lzVl/Ar3P3zhpUT31NjqeCo1f+D5+YfpZ5J62eo2S14QNVOmHBTtbchHm/YAbOOOzCegFnKf4B3Qih9LuldcYQ==}
text-decoder@1.1.1:
resolution: {integrity: sha512-8zll7REEv4GDD3x4/0pW+ppIxSNs7H1J10IKFZsuOMscumCdM2a+toDGLPA3T+1+fLBql4zbt5z83GEQGGV5VA==}
@ -3504,6 +3531,9 @@ packages:
typescript:
optional: true
wasm-feature-detect@1.8.0:
resolution: {integrity: sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==}
watchpack-chokidar2@2.0.1:
resolution: {integrity: sha512-nCFfBIPKr5Sh61s4LPpy1Wtfi0HE8isJ3d2Yb5/Ppw2P2B/3eVSEBjKfN0fmHJSK14+31KwMKmcrzs2GM4P0Ww==}
@ -3647,6 +3677,9 @@ packages:
resolution: {integrity: sha512-zK7YHHz4ZXpW89AHXUPbQVGKI7uvkd3hzusTdotCg1UxyaVtg0zFJSTfW/Dq5f7OBBVnq6cZIaC8Ti4hb6dtCA==}
engines: {node: '>= 14'}
zlibjs@0.3.1:
resolution: {integrity: sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==}
snapshots:
'@aashutoshrathi/word-wrap@1.2.6': {}
@ -4670,6 +4703,8 @@ snapshots:
bluebird@3.7.2: {}
bmp-js@0.1.0: {}
bn.js@4.12.0: {}
bn.js@5.2.1: {}
@ -5837,6 +5872,8 @@ snapshots:
dependencies:
safer-buffer: 2.1.2
idb-keyval@6.2.1: {}
ieee754@1.2.1: {}
iferr@0.1.5: {}
@ -5913,6 +5950,8 @@ snapshots:
is-docker@3.0.0: {}
is-electron@2.2.2: {}
is-extendable@0.1.1: {}
is-extendable@1.0.1:
@ -5954,6 +5993,8 @@ snapshots:
is-unicode-supported@0.1.0: {}
is-url@1.2.4: {}
is-windows@1.0.2: {}
is-wsl@1.1.0: {}
@ -6312,7 +6353,6 @@ snapshots:
node-fetch@2.7.0:
dependencies:
whatwg-url: 5.0.0
optional: true
node-forge@1.3.1: {}
@ -6427,6 +6467,8 @@ snapshots:
is-docker: 2.2.1
is-wsl: 2.2.0
opencollective-postinstall@2.0.3: {}
optionator@0.9.3:
dependencies:
'@aashutoshrathi/word-wrap': 1.2.6
@ -6738,6 +6780,8 @@ snapshots:
dependencies:
picomatch: 2.3.1
regenerator-runtime@0.13.11: {}
regex-not@1.0.2:
dependencies:
extend-shallow: 3.0.2
@ -7182,6 +7226,23 @@ snapshots:
commander: 2.20.3
source-map-support: 0.5.21
tesseract.js-core@5.1.1: {}
tesseract.js@5.1.1:
dependencies:
bmp-js: 0.1.0
idb-keyval: 6.2.1
is-electron: 2.2.2
is-url: 1.2.4
node-fetch: 2.7.0
opencollective-postinstall: 2.0.3
regenerator-runtime: 0.13.11
tesseract.js-core: 5.1.1
wasm-feature-detect: 1.8.0
zlibjs: 0.3.1
transitivePeerDependencies:
- encoding
text-decoder@1.1.1:
dependencies:
b4a: 1.6.6
@ -7227,8 +7288,7 @@ snapshots:
toidentifier@1.0.1: {}
tr46@0.0.3:
optional: true
tr46@0.0.3: {}
ts-api-utils@1.3.0(typescript@5.5.4):
dependencies:
@ -7399,6 +7459,8 @@ snapshots:
optionalDependencies:
typescript: 5.5.4
wasm-feature-detect@1.8.0: {}
watchpack-chokidar2@2.0.1:
dependencies:
chokidar: 2.1.8
@ -7420,8 +7482,7 @@ snapshots:
dependencies:
defaults: 1.0.4
webidl-conversions@3.0.1:
optional: true
webidl-conversions@3.0.1: {}
webpack-merge@6.0.1:
dependencies:
@ -7470,7 +7531,6 @@ snapshots:
dependencies:
tr46: 0.0.3
webidl-conversions: 3.0.1
optional: true
which@2.0.2:
dependencies:
@ -7558,3 +7618,5 @@ snapshots:
archiver-utils: 5.0.2
compress-commons: 6.0.2
readable-stream: 4.5.2
zlibjs@0.3.1: {}

135
src/utils/mrz.ts Normal file
View file

@ -0,0 +1,135 @@
type MRZ = {
type: 'TD1' | 'TD2' | 'TD3';
zone: string[];
};
const MRZ_TD_1 = [
new RegExp(
[
'(?<doc_type>[0-9A-Z<]{1})',
'(?<doc_subtype>[A-Z<]{1})',
'(?<country>[0-9A-Z<]{3})',
'(?<doc_number>[0-9A-Z<]{9})',
'(?<doc_number_check>[0-9A-Z<]{1})',
'(?<complement>[0-9A-Z<]{15})',
].join(''),
),
new RegExp(
[
'(?<birth_date>[0-9A-Z<]{6})',
'(?<birth_date_check>[0-9A-Z<]{1})',
'(?<sex>[mfMF<]{1})',
'(?<expire_date>[0-9A-Z<]{6})',
'(?<expire_date_check>[0-9A-Z<]{1})',
'(?<nationality>[0-9A-Z<]{3})',
'(?<optional_data>[A-Z0-9<]{11})',
'(?<linecheck>[0-9A-Z<]{1})',
].join(''),
),
new RegExp(['(?<full_name>[A-Z<]{30})'].join('')),
];
const MRZ_TD_2 = [
new RegExp(
[
'(?<doc_type>[0-9A-Z<]{1})',
'(?<doc_subtype>[A-Z<]{1})',
'(?<country>[0-9A-Z<]{3})',
'(?<full_name>[A-Z<]{31})',
].join(''),
),
new RegExp(
[
'(?<doc_number>[0-9A-Z<]{9})',
'(?<doc_numbercheck>[0-9A-Z<]{1})',
'(?<nacionality>[0-9A-Z<]{3})',
'(?<birth_date>[0-9A-Z<]{6})',
'(?<birth_date_check>[0-9A-Z<]{1})',
'(?<sex>[mfMF]{1})',
'(?<expire_date>[0-9A-Z<]{6})',
'(?<expire_date_check>[0-9A-Z<]{1})',
'(?<optional_data>[A-Z0-9<]{7})',
'(?<line_check>[0-9A-Z<]{1})',
].join(''),
),
];
const MRZ_TD_3 = [
new RegExp(
[
'(?<doc_type>[A-Z0-9<]{1})',
'(?<doc_subtype>[A-Z0-9<]{1})',
'(?<country>[A-Z0-9]{3})',
'(?<full_name>[A-Z0-9<]{39})',
].join(''),
),
new RegExp(
[
'(?<doc_number>[0-9A-Z<]{9})',
'(?<doc_number_check>[0-9A-Z<]{1})',
'(?<nationality>[0-9A-Z<]{3})',
'(?<birth_date>[0-9A-Z<]{6})',
'(?<birth_date_check>[0-9A-Z<]{1})',
'(?<sex>[mfMF<]{1})',
'(?<expire_date>[0-9A-Z<]{6})',
'(?<expire_date_check>[0-9A-Z<]{1})',
'(?<personal_number>[A-Z0-9<]{14})',
'(?<personal_number_check>[0-9A-Z<]{1})',
'(?<linecheck>[0-9A-Z<]{1})',
].join(''),
),
];
function mrzCleanResult(obj: Record<string, string>) {
Object.entries(obj).forEach(([k, v]) => {
obj[k] = v
.replace(/</g, ' ')
.replace(/\s{2,}/, ' ')
.trim();
});
return obj;
}
export function parseType1(mrz: MRZ) {
const result: Record<string, string> = {};
mrz.zone.forEach((line, i) => {
if (MRZ_TD_1[i].test(line)) {
Object.assign(result, MRZ_TD_1[i].exec(line)?.groups);
}
});
return { mrz, result: mrzCleanResult(result) };
}
export function parseType2(mrz: MRZ) {
const result: Record<string, string> = {};
mrz.zone.forEach((line, i) => {
if (MRZ_TD_2[i].test(line)) {
Object.assign(result, MRZ_TD_2[i].exec(line)?.groups);
}
});
return { mrz, result: mrzCleanResult(result) };
}
export function parseType3(mrz: MRZ) {
const result: Record<string, string> = {};
mrz.zone.forEach((line, i) => {
if (MRZ_TD_3[i].test(line)) {
Object.assign(result, MRZ_TD_3[i].exec(line)?.groups);
}
});
return { mrz, result: mrzCleanResult(result) };
}
export function parseMRZ(mrz: MRZ) {
if (mrz.type === 'TD1') return parseType1(mrz);
if (mrz.type === 'TD2') return parseType2(mrz);
if (mrz.type === 'TD3') return parseType3(mrz);
return null;
}

38
src/utils/ocr.ts Normal file
View file

@ -0,0 +1,38 @@
import { createWorker, ImageLike, RecognizeResult } from 'tesseract.js';
import { parseMRZ } from './mrz';
let worker: Tesseract.Worker;
export function runOcr(image: ImageLike): Promise<void>;
export function runOcr<T extends (result: RecognizeResult) => void>(
image: ImageLike,
callback: T,
): Promise<ReturnType<T>>;
export async function runOcr<T extends (result: RecognizeResult) => void>(
image: ImageLike,
callback?: T,
) {
if (!worker) {
worker = await createWorker(['ocrb', 'eng', 'khm'], 1, {
langPath: '/ocr-data',
});
}
if (callback) return callback(await worker.recognize(image));
}
export function parseResultMRZ(result: RecognizeResult) {
const zone = result.data.text
.split(/[\s\r\n]+/)
.filter((v) => /[A-Z0-9<]{30,}/.test(v));
if (zone.length === 3 && zone[0].length === 30) {
return parseMRZ({ type: 'TD1', zone });
} else if (zone.length === 2 && zone[0].length === 36) {
return parseMRZ({ type: 'TD2', zone });
} else if (zone.length === 2 && zone[0].length === 44) {
return parseMRZ({ type: 'TD3', zone });
} else {
return null;
}
}