67 lines
3 KiB
JavaScript
67 lines
3 KiB
JavaScript
"use strict";
|
|
var __extends = (this && this.__extends) || (function () {
|
|
var extendStatics = function (d, b) {
|
|
extendStatics = Object.setPrototypeOf ||
|
|
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
|
|
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
|
|
return extendStatics(d, b);
|
|
};
|
|
return function (d, b) {
|
|
if (typeof b !== "function" && b !== null)
|
|
throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
|
|
extendStatics(d, b);
|
|
function __() { this.constructor = d; }
|
|
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
|
|
};
|
|
})();
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.sentenceDiff = void 0;
|
|
exports.diffSentences = diffSentences;
|
|
var base_js_1 = require("./base.js");
|
|
function isSentenceEndPunct(char) {
|
|
return char == '.' || char == '!' || char == '?';
|
|
}
|
|
var SentenceDiff = /** @class */ (function (_super) {
|
|
__extends(SentenceDiff, _super);
|
|
function SentenceDiff() {
|
|
return _super !== null && _super.apply(this, arguments) || this;
|
|
}
|
|
SentenceDiff.prototype.tokenize = function (value) {
|
|
var _a;
|
|
// If in future we drop support for environments that don't support lookbehinds, we can replace
|
|
// this entire function with:
|
|
// return value.split(/(?<=[.!?])(\s+|$)/);
|
|
// but until then, for similar reasons to the trailingWs function in string.ts, we are forced
|
|
// to do this verbosely "by hand" instead of using a regex.
|
|
var result = [];
|
|
var tokenStartI = 0;
|
|
for (var i = 0; i < value.length; i++) {
|
|
if (i == value.length - 1) {
|
|
result.push(value.slice(tokenStartI));
|
|
break;
|
|
}
|
|
if (isSentenceEndPunct(value[i]) && value[i + 1].match(/\s/)) {
|
|
// We've hit a sentence break - i.e. a punctuation mark followed by whitespace.
|
|
// We now want to push TWO tokens to the result:
|
|
// 1. the sentence
|
|
result.push(value.slice(tokenStartI, i + 1));
|
|
// 2. the whitespace
|
|
i = tokenStartI = i + 1;
|
|
while ((_a = value[i + 1]) === null || _a === void 0 ? void 0 : _a.match(/\s/)) {
|
|
i++;
|
|
}
|
|
result.push(value.slice(tokenStartI, i + 1));
|
|
// Then the next token (a sentence) starts on the character after the whitespace.
|
|
// (It's okay if this is off the end of the string - then the outer loop will terminate
|
|
// here anyway.)
|
|
tokenStartI = i + 1;
|
|
}
|
|
}
|
|
return result;
|
|
};
|
|
return SentenceDiff;
|
|
}(base_js_1.default));
|
|
exports.sentenceDiff = new SentenceDiff();
|
|
function diffSentences(oldStr, newStr, options) {
|
|
return exports.sentenceDiff.diff(oldStr, newStr, options);
|
|
}
|