diff --git a/src/services/htmlMarkdownUtils.test.ts b/src/services/htmlMarkdownUtils.test.ts
new file mode 100644
index 0000000..3fd5eca
--- /dev/null
+++ b/src/services/htmlMarkdownUtils.test.ts
@@ -0,0 +1,86 @@
+import { describe, it, expect } from 'vitest';
+import { markdownToHtmlNoImages } from './htmlMarkdownUtils';
+
+describe('markdownToHtmlNoImages', () => {
+ it('moves caption into table when caption immediately precedes table', () => {
+ const markdown = `
My Table
+
+| Header |
+| --- |
+| Cell |`;
+ const html = markdownToHtmlNoImages(markdown);
+ // We expect the caption to be inside the table
+ expect(html).toMatch(/\s*My Table<\/caption>/);
+ });
+
+ it('renders table correctly without caption', () => {
+ const markdown = `
+| Header |
+| --- |
+| Cell |`;
+ const html = markdownToHtmlNoImages(markdown);
+ expect(html).toMatch(//);
+ expect(html).not.toMatch(//);
+ expect(html).toContain('Header');
+ expect(html).toContain('Cell');
+ });
+
+ it('moves caption with attributes into table', () => {
+ const markdown = `My Table
+
+| Header |
+| --- |
+| Cell |`;
+ const html = markdownToHtmlNoImages(markdown);
+ expect(html).toMatch(/\s*My Table<\/caption>/);
+ });
+
+ it('adds scope="col" to table headers', () => {
+ const markdown = `
+| Header 1 | Header 2 |
+| --- | --- |
+| Cell 1 | Cell 2 |`;
+ const html = markdownToHtmlNoImages(markdown);
+ expect(html).toContain('| Header 1 | ');
+ expect(html).toContain('Header 2 | ');
+ });
+
+ it('does not add an extra empty header row', () => {
+ const markdown = `
+| Header |
+| --- |
+| Cell |`;
+ const html = markdownToHtmlNoImages(markdown);
+ expect(html).not.toContain(' | ');
+ const thCount = (html.match(/ {
+ const markdown = `
+
+
+
+ | Raw Header |
+
+
+
+
+ | Raw Cell |
+
+
+
+
+| MD Header |
+| --- |
+| MD Cell |`;
+ const html = markdownToHtmlNoImages(markdown);
+
+ // Raw table should be untouched (or at least not have scope="col" added if it wasn't there)
+ expect(html).toContain(' | Raw Header | ');
+ expect(html).not.toContain('Raw Header | ');
+
+ // Markdown table should have scope="col"
+ expect(html).toContain('MD Header | ');
+ });
+});
diff --git a/src/services/htmlMarkdownUtils.ts b/src/services/htmlMarkdownUtils.ts
index abb905b..24209f5 100644
--- a/src/services/htmlMarkdownUtils.ts
+++ b/src/services/htmlMarkdownUtils.ts
@@ -47,6 +47,20 @@ marked.use(
marked.use({ extensions: [mermaidExtension] });
+// We use a custom renderer instead of a regex replace because regex is too aggressive.
+// It would add scope="col" to raw HTML tables (which we want to leave alone).
+// The renderer only applies to markdown tables.
+marked.use({
+ renderer: {
+ tablecell({ text, header, align }) {
+ const type = header ? "th" : "td";
+ const alignAttr = align ? ` align="${align}"` : "";
+ const scopeAttr = header ? ' scope="col"' : "";
+ return `<${type}${scopeAttr}${alignAttr}>${text}${type}>\n`;
+ },
+ },
+});
+
export function extractImageSources(htmlString: string) {
const srcUrls = [];
const regex = /
]+src=["']?([^"'>]+)["']?/g;
@@ -125,8 +139,21 @@ export function markdownToHTMLSafe({
}
export function markdownToHtmlNoImages(markdownString: string) {
- const clean = DOMPurify.sanitize(
- marked.parse(markdownString, { async: false, pedantic: false, gfm: true })
- ).replaceAll(/>[^<>]*<\/math>/g, ">");
+ const parsedHtml = marked.parse(markdownString, {
+ async: false,
+ pedantic: false,
+ gfm: true,
+ }) as string;
+
+ // Move caption inside table
+ const htmlWithCaptionInTable = parsedHtml.replace(
+ /(]*>[\s\S]*?<\/caption>)\s*(]*>)/g,
+ "$2$1"
+ );
+
+ const clean = DOMPurify.sanitize(htmlWithCaptionInTable).replaceAll(
+ />[^<>]*<\/math>/g,
+ ">"
+ );
return clean;
}