From e07d0a6e476f6fd6b953305f043c637da33e0401 Mon Sep 17 00:00:00 2001 From: Adam Teichert Date: Mon, 15 Dec 2025 23:01:35 -0700 Subject: [PATCH 1/2] caption element before table is placed as first element of the table (allows markdown tables to have caption) --- src/services/htmlMarkdownUtils.test.ts | 37 ++++++++++++++++++++++++++ src/services/htmlMarkdownUtils.ts | 19 ++++++++++--- 2 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 src/services/htmlMarkdownUtils.test.ts diff --git a/src/services/htmlMarkdownUtils.test.ts b/src/services/htmlMarkdownUtils.test.ts new file mode 100644 index 0000000..e466c35 --- /dev/null +++ b/src/services/htmlMarkdownUtils.test.ts @@ -0,0 +1,37 @@ +import { describe, it, expect } from 'vitest'; +import { markdownToHtmlNoImages } from './htmlMarkdownUtils'; + +describe('markdownToHtmlNoImages', () => { + it('moves caption into table when caption immediately precedes table', () => { + const markdown = `My Table + +| Header | +| --- | +| Cell |`; + const html = markdownToHtmlNoImages(markdown); + // We expect the caption to be inside the table + expect(html).toMatch(/\s*
My Table<\/caption>/); + }); + + it('renders table correctly without caption', () => { + const markdown = ` +| Header | +| --- | +| Cell |`; + const html = markdownToHtmlNoImages(markdown); + expect(html).toMatch(//); + expect(html).not.toMatch(/ + +| Header | +| --- | +| Cell |`; + const html = markdownToHtmlNoImages(markdown); + expect(html).toMatch(/
/); + expect(html).toContain('Header'); + expect(html).toContain('Cell'); + }); + + it('moves caption with attributes into table', () => { + const markdown = `My Table
\s*
My Table<\/caption>/); + }); +}); diff --git a/src/services/htmlMarkdownUtils.ts b/src/services/htmlMarkdownUtils.ts index abb905b..b53ef5d 100644 --- a/src/services/htmlMarkdownUtils.ts +++ b/src/services/htmlMarkdownUtils.ts @@ -125,8 +125,21 @@ export function markdownToHTMLSafe({ } export function markdownToHtmlNoImages(markdownString: string) { - const clean = DOMPurify.sanitize( - marked.parse(markdownString, { async: false, pedantic: false, gfm: true }) - ).replaceAll(/>[^<>]*<\/math>/g, ">"); + const parsedHtml = marked.parse(markdownString, { + async: false, + pedantic: false, + gfm: true, + }) as string; + + // Move caption inside table + const htmlWithCaptionInTable = parsedHtml.replace( + /(]*>[\s\S]*?<\/caption>)\s*(]*>)/g, + "$2$1" + ); + + const clean = DOMPurify.sanitize(htmlWithCaptionInTable).replaceAll( + />[^<>]*<\/math>/g, + ">" + ); return clean; } From e9f33e01740542aca05875555ef84872684eb045 Mon Sep 17 00:00:00 2001 From: Adam Teichert Date: Tue, 16 Dec 2025 13:02:24 -0700 Subject: [PATCH 2/2] markdown tables headings have scope of col; raw html tables are not changed --- src/services/htmlMarkdownUtils.test.ts | 49 ++++++++++++++++++++++++++ src/services/htmlMarkdownUtils.ts | 14 ++++++++ 2 files changed, 63 insertions(+) diff --git a/src/services/htmlMarkdownUtils.test.ts b/src/services/htmlMarkdownUtils.test.ts index e466c35..3fd5eca 100644 --- a/src/services/htmlMarkdownUtils.test.ts +++ b/src/services/htmlMarkdownUtils.test.ts @@ -34,4 +34,53 @@ describe('markdownToHtmlNoImages', () => { const html = markdownToHtmlNoImages(markdown); expect(html).toMatch(/\s*'); + expect(html).toContain(''); + }); + + it('does not add an extra empty header row', () => { + const markdown = ` +| Header | +| --- | +| Cell |`; + const html = markdownToHtmlNoImages(markdown); + expect(html).not.toContain(''); + const thCount = (html.match(/'); + expect(html).not.toContain(''); + + // Markdown table should have scope="col" + expect(html).toContain(''); + }); }); diff --git a/src/services/htmlMarkdownUtils.ts b/src/services/htmlMarkdownUtils.ts index b53ef5d..24209f5 100644 --- a/src/services/htmlMarkdownUtils.ts +++ b/src/services/htmlMarkdownUtils.ts @@ -47,6 +47,20 @@ marked.use( marked.use({ extensions: [mermaidExtension] }); +// We use a custom renderer instead of a regex replace because regex is too aggressive. +// It would add scope="col" to raw HTML tables (which we want to leave alone). +// The renderer only applies to markdown tables. +marked.use({ + renderer: { + tablecell({ text, header, align }) { + const type = header ? "th" : "td"; + const alignAttr = align ? ` align="${align}"` : ""; + const scopeAttr = header ? ' scope="col"' : ""; + return `<${type}${scopeAttr}${alignAttr}>${text}\n`; + }, + }, +}); + export function extractImageSources(htmlString: string) { const srcUrls = []; const regex = /]+src=["']?([^"'>]+)["']?/g;
My Table<\/caption>/); }); + + it('adds scope="col" to table headers', () => { + const markdown = ` +| Header 1 | Header 2 | +| --- | --- | +| Cell 1 | Cell 2 |`; + const html = markdownToHtmlNoImages(markdown); + expect(html).toContain('
Header 1Header 2 { + const markdown = ` + + + + + + + + + + + +
Raw Header
Raw Cell
+ +| MD Header | +| --- | +| MD Cell |`; + const html = markdownToHtmlNoImages(markdown); + + // Raw table should be untouched (or at least not have scope="col" added if it wasn't there) + expect(html).toContain('
Raw HeaderRaw HeaderMD Header