mirror of
https://github.com/alexmickelson/canvasManagement.git
synced 2026-03-26 07:38:33 -06:00
improving replace url features
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
const scriptRegex = /<script.*?<\/script>/g;
|
||||
const linkTagRegex = /<link\s+rel="[^"]*"\s+href="[^"]*"[^>]*>/g;
|
||||
const htmlAttribute = /\s+\w+="[^"]*"|\s+\w+='[^']*'|\s+\w+=[^\s>]+/g;
|
||||
const nonHrefAttribute =
|
||||
/\s+(?!href\s*=)\w+="[^"]*"|\s+(?!href\s*=)\w+='[^']*'|\s+(?!href\s*=)\w+=[^\s>]+/g;
|
||||
|
||||
function replaceUnicodeEscapes(input: string) {
|
||||
return input.replace(/\\u[\dA-Fa-f]{4}/g, (match) => {
|
||||
@@ -14,7 +15,7 @@ export const removeHtmlDetails = (html: string) => {
|
||||
return withoutUnicode
|
||||
.replaceAll(scriptRegex, "")
|
||||
.replaceAll(linkTagRegex, "")
|
||||
.replaceAll(htmlAttribute, "")
|
||||
.replaceAll(nonHrefAttribute, "")
|
||||
.replaceAll(/\\"/g, '"')
|
||||
.replaceAll(/\s/g, "")
|
||||
.replaceAll(/<hr\s*\/?>/g, "<hr>")
|
||||
@@ -32,5 +33,45 @@ export const removeHtmlDetails = (html: string) => {
|
||||
export const htmlIsCloseEnough = (html1: string, html2: string) => {
|
||||
const simple1 = removeHtmlDetails(html1);
|
||||
const simple2 = removeHtmlDetails(html2);
|
||||
|
||||
if (simple1 !== simple2) {
|
||||
const len1 = simple1.length;
|
||||
const len2 = simple2.length;
|
||||
const maxLen = Math.max(len1, len2);
|
||||
|
||||
let firstDiff = -1;
|
||||
const diffs: Array<{ index: number; a: string; b: string }> = [];
|
||||
|
||||
for (let i = 0; i < maxLen && diffs.length < 10; i++) {
|
||||
const a = simple1[i] ?? "∅";
|
||||
const b = simple2[i] ?? "∅";
|
||||
if (a !== b) {
|
||||
if (firstDiff === -1) firstDiff = i;
|
||||
diffs.push({ index: i, a, b });
|
||||
}
|
||||
}
|
||||
|
||||
const ctx = 30;
|
||||
const start = Math.max(0, (firstDiff === -1 ? 0 : firstDiff) - ctx);
|
||||
const end1 = Math.min(len1, (firstDiff === -1 ? 0 : firstDiff) + ctx);
|
||||
const end2 = Math.min(len2, (firstDiff === -1 ? 0 : firstDiff) + ctx);
|
||||
|
||||
const mark = (s: string, sStart: number, idx: number, sEnd: number) => {
|
||||
if (idx < 0) return s.slice(sStart, sEnd);
|
||||
const before = s.slice(sStart, idx);
|
||||
const ch = s[idx] ?? "∅";
|
||||
const after = s.slice(idx + 1, sEnd);
|
||||
return `${before}[${ch}]${after}`;
|
||||
};
|
||||
|
||||
console.log("htmlIsCloseEnough: differences detected");
|
||||
console.log(`len1=${len1}, len2=${len2}`);
|
||||
if (firstDiff !== -1) {
|
||||
console.log(`firstDiffAt=${firstDiff}`);
|
||||
console.log("s1:", mark(simple1, start, firstDiff, end1));
|
||||
console.log("s2:", mark(simple2, start, firstDiff, end2));
|
||||
}
|
||||
console.log("first 10 diffs:", diffs);
|
||||
}
|
||||
return simple1 === simple2;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user