diff --git a/package.json b/package.json index 8fbe4498..bbdecea4 100644 --- a/package.json +++ b/package.json @@ -27,6 +27,7 @@ "quasar": "^2.16.9", "signature_pad": "^5.0.2", "socket.io-client": "^4.7.5", + "tesseract.js": "^5.1.1", "uuid": "^10.0.0", "vue": "^3.4.38", "vue-i18n": "^9.14.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index eaed4d8b..cca5b964 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -47,6 +47,9 @@ importers: socket.io-client: specifier: ^4.7.5 version: 4.7.5 + tesseract.js: + specifier: ^5.1.1 + version: 5.1.1 uuid: specifier: ^10.0.0 version: 10.0.0 @@ -1140,6 +1143,9 @@ packages: bluebird@3.7.2: resolution: {integrity: sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==} + bmp-js@0.1.0: + resolution: {integrity: sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==} + bn.js@4.12.0: resolution: {integrity: sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==} @@ -1262,7 +1268,6 @@ packages: chokidar@2.1.8: resolution: {integrity: sha512-ZmZUazfOzf0Nve7duiCKD23PFSCs4JPoYyccjUFF3aQkQadqBhfzhjkwBH2mNOG9cTBwhamM37EIsIkZw3nRgg==} - deprecated: Chokidar 2 does not receive security updates since 2019. Upgrade to chokidar 3 with 15x fewer dependencies chokidar@3.6.0: resolution: {integrity: sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==} @@ -1880,7 +1885,7 @@ packages: resolution: {integrity: sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==} engines: {node: '>= 4.0'} os: [darwin] - deprecated: The v1 package contains DANGEROUS / INSECURE binaries. Upgrade to safe fsevents v2 + deprecated: Upgrade to fsevents v2 to mitigate potential security issues fsevents@2.3.2: resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==} @@ -2019,6 +2024,9 @@ packages: resolution: {integrity: sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==} engines: {node: '>=0.10.0'} + idb-keyval@6.2.1: + resolution: {integrity: sha512-8Sb3veuYCyrZL+VBt9LJfZjLUPWVvqn8tG28VqYNFCo43KHcKuq+b4EiXGeuaLAQWL2YmyDgMp2aSpH9JHsEQg==} + ieee754@1.2.1: resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==} @@ -2097,6 +2105,9 @@ packages: engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} hasBin: true + is-electron@2.2.2: + resolution: {integrity: sha512-FO/Rhvz5tuw4MCWkpMzHFKWD2LsfHzIb7i6MdPYZ/KW7AlxawyLkqdy+jPZP1WubqEADE3O4FUENlJHDfQASRg==} + is-extendable@0.1.1: resolution: {integrity: sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==} engines: {node: '>=0.10.0'} @@ -2154,6 +2165,9 @@ packages: resolution: {integrity: sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==} engines: {node: '>=10'} + is-url@1.2.4: + resolution: {integrity: sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==} + is-windows@1.0.2: resolution: {integrity: sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA==} engines: {node: '>=0.10.0'} @@ -2594,6 +2608,10 @@ packages: resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==} engines: {node: '>=12'} + opencollective-postinstall@2.0.3: + resolution: {integrity: sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==} + hasBin: true + optionator@0.9.3: resolution: {integrity: sha512-JjCoypp+jKn1ttEFExxhetCKeJt9zhAgAve5FXHixTvFDW/5aEktX9bufBKLRRMdU7bNtpLfcGu94B3cdEJgjg==} engines: {node: '>= 0.8.0'} @@ -2890,6 +2908,9 @@ packages: resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==} engines: {node: '>=8.10.0'} + regenerator-runtime@0.13.11: + resolution: {integrity: sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==} + regex-not@1.0.2: resolution: {integrity: sha512-J6SDjUgDxQj5NusnOtdFxDwN/+HWykR8GELwctJ7mdqhcyy1xEc4SRFHUXvxTp661YaVKAjfRLZ9cCqS6tn32A==} engines: {node: '>=0.10.0'} @@ -3255,6 +3276,12 @@ packages: engines: {node: '>=10'} hasBin: true + tesseract.js-core@5.1.1: + resolution: {integrity: sha512-KX3bYSU5iGcO1XJa+QGPbi+Zjo2qq6eBhNjSGR5E5q0JtzkoipJKOUQD7ph8kFyteCEfEQ0maWLu8MCXtvX5uQ==} + + tesseract.js@5.1.1: + resolution: {integrity: sha512-lzVl/Ar3P3zhpUT31NjqeCo1f+D5+YfpZ5J62eo2S14QNVOmHBTtbchHm/YAbOOOzCegFnKf4B3Qih9LuldcYQ==} + text-decoder@1.1.1: resolution: {integrity: sha512-8zll7REEv4GDD3x4/0pW+ppIxSNs7H1J10IKFZsuOMscumCdM2a+toDGLPA3T+1+fLBql4zbt5z83GEQGGV5VA==} @@ -3504,6 +3531,9 @@ packages: typescript: optional: true + wasm-feature-detect@1.8.0: + resolution: {integrity: sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==} + watchpack-chokidar2@2.0.1: resolution: {integrity: sha512-nCFfBIPKr5Sh61s4LPpy1Wtfi0HE8isJ3d2Yb5/Ppw2P2B/3eVSEBjKfN0fmHJSK14+31KwMKmcrzs2GM4P0Ww==} @@ -3647,6 +3677,9 @@ packages: resolution: {integrity: sha512-zK7YHHz4ZXpW89AHXUPbQVGKI7uvkd3hzusTdotCg1UxyaVtg0zFJSTfW/Dq5f7OBBVnq6cZIaC8Ti4hb6dtCA==} engines: {node: '>= 14'} + zlibjs@0.3.1: + resolution: {integrity: sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==} + snapshots: '@aashutoshrathi/word-wrap@1.2.6': {} @@ -4670,6 +4703,8 @@ snapshots: bluebird@3.7.2: {} + bmp-js@0.1.0: {} + bn.js@4.12.0: {} bn.js@5.2.1: {} @@ -5837,6 +5872,8 @@ snapshots: dependencies: safer-buffer: 2.1.2 + idb-keyval@6.2.1: {} + ieee754@1.2.1: {} iferr@0.1.5: {} @@ -5913,6 +5950,8 @@ snapshots: is-docker@3.0.0: {} + is-electron@2.2.2: {} + is-extendable@0.1.1: {} is-extendable@1.0.1: @@ -5954,6 +5993,8 @@ snapshots: is-unicode-supported@0.1.0: {} + is-url@1.2.4: {} + is-windows@1.0.2: {} is-wsl@1.1.0: {} @@ -6312,7 +6353,6 @@ snapshots: node-fetch@2.7.0: dependencies: whatwg-url: 5.0.0 - optional: true node-forge@1.3.1: {} @@ -6427,6 +6467,8 @@ snapshots: is-docker: 2.2.1 is-wsl: 2.2.0 + opencollective-postinstall@2.0.3: {} + optionator@0.9.3: dependencies: '@aashutoshrathi/word-wrap': 1.2.6 @@ -6738,6 +6780,8 @@ snapshots: dependencies: picomatch: 2.3.1 + regenerator-runtime@0.13.11: {} + regex-not@1.0.2: dependencies: extend-shallow: 3.0.2 @@ -7182,6 +7226,23 @@ snapshots: commander: 2.20.3 source-map-support: 0.5.21 + tesseract.js-core@5.1.1: {} + + tesseract.js@5.1.1: + dependencies: + bmp-js: 0.1.0 + idb-keyval: 6.2.1 + is-electron: 2.2.2 + is-url: 1.2.4 + node-fetch: 2.7.0 + opencollective-postinstall: 2.0.3 + regenerator-runtime: 0.13.11 + tesseract.js-core: 5.1.1 + wasm-feature-detect: 1.8.0 + zlibjs: 0.3.1 + transitivePeerDependencies: + - encoding + text-decoder@1.1.1: dependencies: b4a: 1.6.6 @@ -7227,8 +7288,7 @@ snapshots: toidentifier@1.0.1: {} - tr46@0.0.3: - optional: true + tr46@0.0.3: {} ts-api-utils@1.3.0(typescript@5.5.4): dependencies: @@ -7399,6 +7459,8 @@ snapshots: optionalDependencies: typescript: 5.5.4 + wasm-feature-detect@1.8.0: {} + watchpack-chokidar2@2.0.1: dependencies: chokidar: 2.1.8 @@ -7420,8 +7482,7 @@ snapshots: dependencies: defaults: 1.0.4 - webidl-conversions@3.0.1: - optional: true + webidl-conversions@3.0.1: {} webpack-merge@6.0.1: dependencies: @@ -7470,7 +7531,6 @@ snapshots: dependencies: tr46: 0.0.3 webidl-conversions: 3.0.1 - optional: true which@2.0.2: dependencies: @@ -7558,3 +7618,5 @@ snapshots: archiver-utils: 5.0.2 compress-commons: 6.0.2 readable-stream: 4.5.2 + + zlibjs@0.3.1: {} diff --git a/src/utils/mrz.ts b/src/utils/mrz.ts new file mode 100644 index 00000000..2a041933 --- /dev/null +++ b/src/utils/mrz.ts @@ -0,0 +1,135 @@ +type MRZ = { + type: 'TD1' | 'TD2' | 'TD3'; + zone: string[]; +}; + +const MRZ_TD_1 = [ + new RegExp( + [ + '(?[0-9A-Z<]{1})', + '(?[A-Z<]{1})', + '(?[0-9A-Z<]{3})', + '(?[0-9A-Z<]{9})', + '(?[0-9A-Z<]{1})', + '(?[0-9A-Z<]{15})', + ].join(''), + ), + new RegExp( + [ + '(?[0-9A-Z<]{6})', + '(?[0-9A-Z<]{1})', + '(?[mfMF<]{1})', + '(?[0-9A-Z<]{6})', + '(?[0-9A-Z<]{1})', + '(?[0-9A-Z<]{3})', + '(?[A-Z0-9<]{11})', + '(?[0-9A-Z<]{1})', + ].join(''), + ), + new RegExp(['(?[A-Z<]{30})'].join('')), +]; +const MRZ_TD_2 = [ + new RegExp( + [ + '(?[0-9A-Z<]{1})', + '(?[A-Z<]{1})', + '(?[0-9A-Z<]{3})', + '(?[A-Z<]{31})', + ].join(''), + ), + + new RegExp( + [ + '(?[0-9A-Z<]{9})', + '(?[0-9A-Z<]{1})', + '(?[0-9A-Z<]{3})', + '(?[0-9A-Z<]{6})', + '(?[0-9A-Z<]{1})', + '(?[mfMF]{1})', + '(?[0-9A-Z<]{6})', + '(?[0-9A-Z<]{1})', + '(?[A-Z0-9<]{7})', + '(?[0-9A-Z<]{1})', + ].join(''), + ), +]; + +const MRZ_TD_3 = [ + new RegExp( + [ + '(?[A-Z0-9<]{1})', + '(?[A-Z0-9<]{1})', + '(?[A-Z0-9]{3})', + '(?[A-Z0-9<]{39})', + ].join(''), + ), + + new RegExp( + [ + '(?[0-9A-Z<]{9})', + '(?[0-9A-Z<]{1})', + '(?[0-9A-Z<]{3})', + '(?[0-9A-Z<]{6})', + '(?[0-9A-Z<]{1})', + '(?[mfMF<]{1})', + '(?[0-9A-Z<]{6})', + '(?[0-9A-Z<]{1})', + '(?[A-Z0-9<]{14})', + '(?[0-9A-Z<]{1})', + '(?[0-9A-Z<]{1})', + ].join(''), + ), +]; + +function mrzCleanResult(obj: Record) { + Object.entries(obj).forEach(([k, v]) => { + obj[k] = v + .replace(/ = {}; + + mrz.zone.forEach((line, i) => { + if (MRZ_TD_1[i].test(line)) { + Object.assign(result, MRZ_TD_1[i].exec(line)?.groups); + } + }); + + return { mrz, result: mrzCleanResult(result) }; +} + +export function parseType2(mrz: MRZ) { + const result: Record = {}; + + mrz.zone.forEach((line, i) => { + if (MRZ_TD_2[i].test(line)) { + Object.assign(result, MRZ_TD_2[i].exec(line)?.groups); + } + }); + + return { mrz, result: mrzCleanResult(result) }; +} + +export function parseType3(mrz: MRZ) { + const result: Record = {}; + + mrz.zone.forEach((line, i) => { + if (MRZ_TD_3[i].test(line)) { + Object.assign(result, MRZ_TD_3[i].exec(line)?.groups); + } + }); + + return { mrz, result: mrzCleanResult(result) }; +} + +export function parseMRZ(mrz: MRZ) { + if (mrz.type === 'TD1') return parseType1(mrz); + if (mrz.type === 'TD2') return parseType2(mrz); + if (mrz.type === 'TD3') return parseType3(mrz); + return null; +} diff --git a/src/utils/ocr.ts b/src/utils/ocr.ts new file mode 100644 index 00000000..10487e65 --- /dev/null +++ b/src/utils/ocr.ts @@ -0,0 +1,38 @@ +import { createWorker, ImageLike, RecognizeResult } from 'tesseract.js'; + +import { parseMRZ } from './mrz'; + +let worker: Tesseract.Worker; + +export function runOcr(image: ImageLike): Promise; +export function runOcr void>( + image: ImageLike, + callback: T, +): Promise>; +export async function runOcr void>( + image: ImageLike, + callback?: T, +) { + if (!worker) { + worker = await createWorker(['ocrb', 'eng', 'khm'], 1, { + langPath: '/ocr-data', + }); + } + if (callback) return callback(await worker.recognize(image)); +} + +export function parseResultMRZ(result: RecognizeResult) { + const zone = result.data.text + .split(/[\s\r\n]+/) + .filter((v) => /[A-Z0-9<]{30,}/.test(v)); + + if (zone.length === 3 && zone[0].length === 30) { + return parseMRZ({ type: 'TD1', zone }); + } else if (zone.length === 2 && zone[0].length === 36) { + return parseMRZ({ type: 'TD2', zone }); + } else if (zone.length === 2 && zone[0].length === 44) { + return parseMRZ({ type: 'TD3', zone }); + } else { + return null; + } +}