diff --git a/src/services/htmlMarkdownUtils.test.ts b/src/services/htmlMarkdownUtils.test.ts new file mode 100644 index 0000000..3fd5eca --- /dev/null +++ b/src/services/htmlMarkdownUtils.test.ts @@ -0,0 +1,86 @@ +import { describe, it, expect } from 'vitest'; +import { markdownToHtmlNoImages } from './htmlMarkdownUtils'; + +describe('markdownToHtmlNoImages', () => { + it('moves caption into table when caption immediately precedes table', () => { + const markdown = `My Table + +| Header | +| --- | +| Cell |`; + const html = markdownToHtmlNoImages(markdown); + // We expect the caption to be inside the table + expect(html).toMatch(/\s*
My Table<\/caption>/); + }); + + it('renders table correctly without caption', () => { + const markdown = ` +| Header | +| --- | +| Cell |`; + const html = markdownToHtmlNoImages(markdown); + expect(html).toMatch(//); + expect(html).not.toMatch(/ + +| Header | +| --- | +| Cell |`; + const html = markdownToHtmlNoImages(markdown); + expect(html).toMatch(/
/); + expect(html).toContain('Header'); + expect(html).toContain('Cell'); + }); + + it('moves caption with attributes into table', () => { + const markdown = `My Table
\s*'); + expect(html).toContain(''); + }); + + it('does not add an extra empty header row', () => { + const markdown = ` +| Header | +| --- | +| Cell |`; + const html = markdownToHtmlNoImages(markdown); + expect(html).not.toContain(''); + const thCount = (html.match(/'); + expect(html).not.toContain(''); + + // Markdown table should have scope="col" + expect(html).toContain(''); + }); +}); diff --git a/src/services/htmlMarkdownUtils.ts b/src/services/htmlMarkdownUtils.ts index abb905b..24209f5 100644 --- a/src/services/htmlMarkdownUtils.ts +++ b/src/services/htmlMarkdownUtils.ts @@ -47,6 +47,20 @@ marked.use( marked.use({ extensions: [mermaidExtension] }); +// We use a custom renderer instead of a regex replace because regex is too aggressive. +// It would add scope="col" to raw HTML tables (which we want to leave alone). +// The renderer only applies to markdown tables. +marked.use({ + renderer: { + tablecell({ text, header, align }) { + const type = header ? "th" : "td"; + const alignAttr = align ? ` align="${align}"` : ""; + const scopeAttr = header ? ' scope="col"' : ""; + return `<${type}${scopeAttr}${alignAttr}>${text}\n`; + }, + }, +}); + export function extractImageSources(htmlString: string) { const srcUrls = []; const regex = /]+src=["']?([^"'>]+)["']?/g; @@ -125,8 +139,21 @@ export function markdownToHTMLSafe({ } export function markdownToHtmlNoImages(markdownString: string) { - const clean = DOMPurify.sanitize( - marked.parse(markdownString, { async: false, pedantic: false, gfm: true }) - ).replaceAll(/>[^<>]*<\/math>/g, ">"); + const parsedHtml = marked.parse(markdownString, { + async: false, + pedantic: false, + gfm: true, + }) as string; + + // Move caption inside table + const htmlWithCaptionInTable = parsedHtml.replace( + /(]*>[\s\S]*?<\/caption>)\s*(]*>)/g, + "$2$1" + ); + + const clean = DOMPurify.sanitize(htmlWithCaptionInTable).replaceAll( + />[^<>]*<\/math>/g, + ">" + ); return clean; }
My Table<\/caption>/); + }); + + it('adds scope="col" to table headers', () => { + const markdown = ` +| Header 1 | Header 2 | +| --- | --- | +| Cell 1 | Cell 2 |`; + const html = markdownToHtmlNoImages(markdown); + expect(html).toContain('
Header 1Header 2 { + const markdown = ` + + + + + + + + + + + +
Raw Header
Raw Cell
+ +| MD Header | +| --- | +| MD Cell |`; + const html = markdownToHtmlNoImages(markdown); + + // Raw table should be untouched (or at least not have scope="col" added if it wasn't there) + expect(html).toContain('
Raw HeaderRaw HeaderMD Header