Merge pull request #17 from teichert/table-scope-col

(ADA) markdown table headers cells are column scoped
This commit is contained in:
2026-01-15 13:46:22 -07:00
committed by GitHub
2 changed files with 116 additions and 3 deletions

View File

@@ -0,0 +1,86 @@
import { describe, it, expect } from 'vitest';
import { markdownToHtmlNoImages } from './htmlMarkdownUtils';
describe('markdownToHtmlNoImages', () => {
it('moves caption into table when caption immediately precedes table', () => {
const markdown = `<caption>My Table</caption>
| Header |
| --- |
| Cell |`;
const html = markdownToHtmlNoImages(markdown);
// We expect the caption to be inside the table
expect(html).toMatch(/<table>\s*<caption>My Table<\/caption>/);
});
it('renders table correctly without caption', () => {
const markdown = `
| Header |
| --- |
| Cell |`;
const html = markdownToHtmlNoImages(markdown);
expect(html).toMatch(/<table>/);
expect(html).not.toMatch(/<caption>/);
expect(html).toContain('Header');
expect(html).toContain('Cell');
});
it('moves caption with attributes into table', () => {
const markdown = `<caption style="color:red">My Table</caption>
| Header |
| --- |
| Cell |`;
const html = markdownToHtmlNoImages(markdown);
expect(html).toMatch(/<table>\s*<caption style="color:red">My Table<\/caption>/);
});
it('adds scope="col" to table headers', () => {
const markdown = `
| Header 1 | Header 2 |
| --- | --- |
| Cell 1 | Cell 2 |`;
const html = markdownToHtmlNoImages(markdown);
expect(html).toContain('<th scope="col">Header 1</th>');
expect(html).toContain('<th scope="col">Header 2</th>');
});
it('does not add an extra empty header row', () => {
const markdown = `
| Header |
| --- |
| Cell |`;
const html = markdownToHtmlNoImages(markdown);
expect(html).not.toContain('<th scope="col"></th>');
const thCount = (html.match(/<th scope="col"/g) || []).length;
expect(thCount).toBe(1);
});
it('does not add scope="col" to raw HTML tables', () => {
const markdown = `
<table>
<thead>
<tr>
<th>Raw Header</th>
</tr>
</thead>
<tbody>
<tr>
<td>Raw Cell</td>
</tr>
</tbody>
</table>
| MD Header |
| --- |
| MD Cell |`;
const html = markdownToHtmlNoImages(markdown);
// Raw table should be untouched (or at least not have scope="col" added if it wasn't there)
expect(html).toContain('<th>Raw Header</th>');
expect(html).not.toContain('<th scope="col">Raw Header</th>');
// Markdown table should have scope="col"
expect(html).toContain('<th scope="col">MD Header</th>');
});
});

View File

@@ -47,6 +47,20 @@ marked.use(
marked.use({ extensions: [mermaidExtension] }); marked.use({ extensions: [mermaidExtension] });
// We use a custom renderer instead of a regex replace because regex is too aggressive.
// It would add scope="col" to raw HTML tables (which we want to leave alone).
// The renderer only applies to markdown tables.
marked.use({
renderer: {
tablecell({ text, header, align }) {
const type = header ? "th" : "td";
const alignAttr = align ? ` align="${align}"` : "";
const scopeAttr = header ? ' scope="col"' : "";
return `<${type}${scopeAttr}${alignAttr}>${text}</${type}>\n`;
},
},
});
export function extractImageSources(htmlString: string) { export function extractImageSources(htmlString: string) {
const srcUrls = []; const srcUrls = [];
const regex = /<img[^>]+src=["']?([^"'>]+)["']?/g; const regex = /<img[^>]+src=["']?([^"'>]+)["']?/g;
@@ -125,8 +139,21 @@ export function markdownToHTMLSafe({
} }
export function markdownToHtmlNoImages(markdownString: string) { export function markdownToHtmlNoImages(markdownString: string) {
const clean = DOMPurify.sanitize( const parsedHtml = marked.parse(markdownString, {
marked.parse(markdownString, { async: false, pedantic: false, gfm: true }) async: false,
).replaceAll(/>[^<>]*<\/math>/g, "></math>"); pedantic: false,
gfm: true,
}) as string;
// Move caption inside table
const htmlWithCaptionInTable = parsedHtml.replace(
/(<caption[^>]*>[\s\S]*?<\/caption>)\s*(<table[^>]*>)/g,
"$2$1"
);
const clean = DOMPurify.sanitize(htmlWithCaptionInTable).replaceAll(
/>[^<>]*<\/math>/g,
"></math>"
);
return clean; return clean;
} }