feat: HTML rendering in markdown files #7

This commit is contained in:
Stefan Hausotte 2026-03-09 20:15:24 +01:00
parent d51ac33c71
commit 79742e761d
16 changed files with 3893 additions and 8 deletions

View file

@ -0,0 +1,17 @@
import Foundation
enum BundledWebResources {
static func contents(of filename: String) -> String {
let name = (filename as NSString).deletingPathExtension
let ext = (filename as NSString).pathExtension
guard let url = Bundle.main.url(forResource: name, withExtension: ext)
else {
fatalError("Missing bundled web resource: \(filename)")
}
do {
return try String(contentsOf: url, encoding: .utf8)
} catch {
fatalError("Failed to read bundled web resource \(filename): \(error)")
}
}
}

View file

@ -3,16 +3,27 @@ import Foundation
enum MarkdownSegment: Equatable {
case text(String)
case mermaid(String)
case html(String)
}
enum MermaidParser {
private static let regex: NSRegularExpression? = {
private static let mermaidRegex: NSRegularExpression? = {
let pattern = #"(?m)^```mermaid[ \t]*\n([\s\S]*?)^```[ \t]*$"#
return try? NSRegularExpression(pattern: pattern, options: .anchorsMatchLines)
}()
static func parse(_ markdown: String) -> [MarkdownSegment] {
guard let regex else {
let mermaidSegments = extractMermaid(markdown)
return mermaidSegments.flatMap { segment -> [MarkdownSegment] in
if case let .text(text) = segment {
return HTMLParser.extractHTML(from: text)
}
return [segment]
}
}
private static func extractMermaid(_ markdown: String) -> [MarkdownSegment] {
guard let regex = mermaidRegex else {
return [.text(markdown)]
}
@ -58,3 +69,197 @@ enum MermaidParser {
return segments
}
}
enum HTMLParser {
private static let blockTags = [
"details", "table", "div", "center", "picture", "figure", "dl", "section",
]
private static let inlineTags = [
"kbd", "sup", "sub", "br", "img", "abbr", "mark", "ins", "del", "s",
"u", "small", "ruby", "rt", "rp", "bdo", "wbr", "q", "cite", "dfn",
"var", "samp", "data", "time", "span",
]
private static let fencedCodeRegex: NSRegularExpression? = {
let pattern = #"(?m)^(`{3,}|~{3,}).*\n[\s\S]*?^\1[ \t]*$"#
return try? NSRegularExpression(pattern: pattern, options: .anchorsMatchLines)
}()
private static let inlineCodeRegex: NSRegularExpression? = {
let pattern = #"`[^`]+`"#
return try? NSRegularExpression(pattern: pattern, options: [])
}()
private static let blockRegex: NSRegularExpression? = {
let tags = blockTags.joined(separator: "|")
let pattern = #"(?m)(?:^[ \t]*)?<(\#(tags))(\s[^>]*)?>[\s\S]*?</\1>[ \t]*(?:\n|$)"#
return try? NSRegularExpression(
pattern: pattern, options: [.anchorsMatchLines, .caseInsensitive],
)
}()
private static let blankLineRegex: NSRegularExpression? =
try? NSRegularExpression(pattern: #"\n[ \t]*\n"#, options: [])
private static let inlineHTMLPattern: NSRegularExpression? = {
let allTags = (inlineTags + blockTags).joined(separator: "|")
let pattern = #"</?(\#(allTags))[\s>/]"#
return try? NSRegularExpression(pattern: pattern, options: .caseInsensitive)
}()
/// Replaces fenced code blocks and inline code with placeholder text (same length)
/// so HTML inside code is not detected.
private static func maskCodeBlocks(_ text: String) -> String {
var result = text
if let fencedRegex = fencedCodeRegex {
let nsString = result as NSString
let matches = fencedRegex.matches(in: result, range: NSRange(location: 0, length: nsString.length))
for match in matches.reversed() {
let range = match.range
let replacement = String(repeating: " ", count: range.length)
result = (result as NSString).replacingCharacters(in: range, with: replacement)
}
}
if let inlineRegex = inlineCodeRegex {
let nsString = result as NSString
let matches = inlineRegex.matches(in: result, range: NSRange(location: 0, length: nsString.length))
for match in matches.reversed() {
let range = match.range
let replacement = String(repeating: " ", count: range.length)
result = (result as NSString).replacingCharacters(in: range, with: replacement)
}
}
return result
}
// swiftlint:disable:next function_body_length cyclomatic_complexity
static func extractHTML(from text: String) -> [MarkdownSegment] {
let masked = maskCodeBlocks(text)
// Collect ranges of HTML blocks (in the original text)
var htmlRanges: [NSRange] = []
// 1. Block-level HTML
if let regex = blockRegex {
let nsString = masked as NSString
let matches = regex.matches(in: masked, range: NSRange(location: 0, length: nsString.length))
for match in matches {
htmlRanges.append(match.range)
}
}
// 2. Inline HTML check paragraphs for inline tags
if let inlinePattern = inlineHTMLPattern {
let paragraphs = splitIntoParagraphs(masked)
for para in paragraphs {
// Skip if this paragraph range already overlaps with a block HTML range
let overlaps = htmlRanges.contains { overlap($0, para.range) }
if overlaps { continue }
let paraText = (masked as NSString).substring(with: para.range)
let hasTag = inlinePattern.firstMatch(
in: paraText,
range: NSRange(location: 0, length: (paraText as NSString).length),
) != nil
if hasTag {
htmlRanges.append(para.range)
}
}
}
if htmlRanges.isEmpty {
return [.text(text)]
}
// Sort ranges by location
htmlRanges.sort { $0.location < $1.location }
// Merge overlapping ranges
var merged: [NSRange] = []
for range in htmlRanges {
if let last = merged.last, range.location <= last.location + last.length {
let end = max(last.location + last.length, range.location + range.length)
merged[merged.count - 1] = NSRange(location: last.location, length: end - last.location)
} else {
merged.append(range)
}
}
// Build segments from the original text
let nsText = text as NSString
var segments: [MarkdownSegment] = []
var lastEnd = 0
for range in merged {
if range.location > lastEnd {
let before = nsText.substring(
with: NSRange(location: lastEnd, length: range.location - lastEnd),
)
if !before.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
segments.append(.text(before))
}
}
let htmlContent = nsText.substring(with: range)
.trimmingCharacters(in: .whitespacesAndNewlines)
if !htmlContent.isEmpty {
segments.append(.html(htmlContent))
}
lastEnd = range.location + range.length
}
if lastEnd < nsText.length {
let after = nsText.substring(from: lastEnd)
if !after.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
segments.append(.text(after))
}
}
return segments
}
private struct Paragraph {
let range: NSRange
}
/// Splits text into paragraphs (separated by blank lines).
private static func splitIntoParagraphs(_ text: String) -> [Paragraph] {
let nsString = text as NSString
var paragraphs: [Paragraph] = []
guard let regex = blankLineRegex else { return [] }
let fullRange = NSRange(location: 0, length: nsString.length)
let splits = regex.matches(in: text, range: fullRange)
var lastEnd = 0
for split in splits {
let paraRange = NSRange(location: lastEnd, length: split.range.location - lastEnd)
if paraRange.length > 0 {
paragraphs.append(Paragraph(range: paraRange))
}
lastEnd = split.range.location + split.range.length
}
if lastEnd < nsString.length {
let paraRange = NSRange(location: lastEnd, length: nsString.length - lastEnd)
if paraRange.length > 0 {
paragraphs.append(Paragraph(range: paraRange))
}
}
return paragraphs
}
private static func overlap(_ rangeA: NSRange, _ rangeB: NSRange) -> Bool {
let endA = rangeA.location + rangeA.length
let endB = rangeB.location + rangeB.length
return rangeA.location < endB && rangeB.location < endA
}
}

View file

@ -0,0 +1,28 @@
# Bundled Web Libraries
#
# These files are loaded at runtime by MermaidWebView and HTMLBlockView
# for fully offline rendering. They are inlined into the HTML string via
# BundledWebResources.swift.
#
# Pinned versions
# mermaid 11.12.3 mermaid.min.js
# marked 15.0.12 marked.min.js
# DOMPurify 3.3.2 purify.min.js
# github-markdown-css 5.9.0 github-markdown-{dark,light}.min.css
#
# How to update
# 1. Check for new releases on each library's GitHub repository.
# 2. Download the new minified files from jsDelivr, pinning the exact version:
#
# curl -Lo mermaid.min.js "https://cdn.jsdelivr.net/npm/mermaid@VERSION/dist/mermaid.min.js"
# curl -Lo marked.min.js "https://cdn.jsdelivr.net/npm/marked@VERSION/marked.min.js"
# curl -Lo purify.min.js "https://cdn.jsdelivr.net/npm/dompurify@VERSION/dist/purify.min.js"
# curl -Lo github-markdown-dark.min.css "https://cdn.jsdelivr.net/npm/github-markdown-css@VERSION/github-markdown-dark.min.css"
# curl -Lo github-markdown-light.min.css "https://cdn.jsdelivr.net/npm/github-markdown-css@VERSION/github-markdown-light.min.css"
#
# 3. Update the pinned versions in this file.
# 4. Build and verify: `just test` passes and rendering works in Simulator.
# 5. For mermaid: confirm the IIFE bundle still exposes `globalThis.mermaid`.
# If the build format changes, MermaidWebView.swift may need adjustment.
# 6. For DOMPurify: review changelog for API changes and verify the sanitizer
# config in HTMLBlockView.swift still works (ADD_TAGS, ADD_ATTR lists).

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

69
Forji/Forji/Resources/Web/marked.min.js vendored Normal file

File diff suppressed because one or more lines are too long

2843
Forji/Forji/Resources/Web/mermaid.min.js vendored Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,213 @@
import SwiftUI
import WebKit
struct HTMLBlockView: UIViewRepresentable {
private static let markedJS = BundledWebResources.contents(of: "marked.min.js")
private static let purifyJS = BundledWebResources.contents(of: "purify.min.js")
private static let darkCSS = BundledWebResources.contents(of: "github-markdown-dark.min.css")
private static let lightCSS = BundledWebResources.contents(of: "github-markdown-light.min.css")
let markdown: String
@Binding var height: CGFloat
var baseURL: URL?
var onNavigateToFile: ((String) -> Void)?
@Environment(\.colorScheme) private var colorScheme
func makeCoordinator() -> Coordinator {
Coordinator(parent: self)
}
func makeUIView(context: Context) -> WKWebView {
let config = WKWebViewConfiguration()
let controller = WKUserContentController()
controller.add(context.coordinator, name: "heightReporter")
config.userContentController = controller
let webView = WKWebView(frame: .zero, configuration: config)
webView.isOpaque = false
webView.backgroundColor = .clear
webView.scrollView.backgroundColor = .clear
webView.scrollView.isScrollEnabled = true
webView.scrollView.bounces = false
webView.scrollView.showsVerticalScrollIndicator = false
webView.navigationDelegate = context.coordinator
return webView
}
// swiftlint:disable:next function_body_length
func updateUIView(_ webView: WKWebView, context: Context) {
let theme = colorScheme == .dark ? "dark" : "light"
let key = "\(markdown)\n---\n\(theme)"
guard key != context.coordinator.lastRenderedKey else { return }
context.coordinator.lastRenderedKey = key
let jsonData = try? JSONSerialization.data(
withJSONObject: markdown, options: .fragmentsAllowed,
)
let escapedMarkdown = jsonData.flatMap { String(data: $0, encoding: .utf8) } ?? "\"\""
// Build raw base URL for resolving relative image paths
let rawBase: String = {
guard let url = baseURL else { return "" }
let str = url.absoluteString
if let range = str.range(of: "/src/branch/") {
return str.replacingCharacters(in: range, with: "/raw/branch/")
}
return str
}()
let escapedRawBase = rawBase
.replacingOccurrences(of: "\\", with: "\\\\")
.replacingOccurrences(of: "'", with: "\\'")
.replacingOccurrences(of: "\n", with: "")
let markdownCSS = theme == "dark" ? Self.darkCSS : Self.lightCSS
let markedJS = Self.markedJS
let purifyJS = Self.purifyJS
// swiftlint:disable:next line_length
let csp = "default-src 'none'; script-src 'unsafe-inline'; style-src 'unsafe-inline'; img-src * data: blob:; connect-src *;"
let html = """
<!DOCTYPE html>
<html data-color-mode="\(theme)" data-\(theme)-theme="\(theme)">
<head>
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0">
<meta http-equiv="Content-Security-Policy" content="\(csp)">
<style>\(markdownCSS)</style>
<style>
body {
margin: 0;
padding: 0;
background: transparent;
}
.markdown-body {
background: transparent;
font-size: 15px;
line-height: 1.5;
}
.table-wrapper {
overflow-x: auto;
-webkit-overflow-scrolling: touch;
}
.markdown-body table {
display: table;
min-width: 100%;
}
.markdown-body img {
max-width: 100%;
}
</style>
</head>
<body>
<div class="markdown-body" id="content"></div>
<script>\(markedJS)</script>
<script>\(purifyJS)</script>
<script>
const raw = \(escapedMarkdown);
const rendered = DOMPurify.sanitize(marked.parse(raw), {
ADD_TAGS: ['details', 'summary', 'picture', 'source', 'figure',
'figcaption', 'dl', 'dt', 'dd', 'center', 'section'],
ADD_ATTR: ['align', 'width', 'height', 'srcset', 'media', 'loading']
});
const el = document.getElementById('content');
el.innerHTML = rendered;
// Rewrite relative image URLs to absolute raw URLs
const rawBase = '\(escapedRawBase)';
if (rawBase) {
const base = rawBase.endsWith('/') ? rawBase : rawBase + '/';
function resolveUrl(src) {
if (!src || src.startsWith('http') || src.startsWith('data:')
|| src.startsWith('blob:') || src.startsWith('/')) return src;
return base + src;
}
el.querySelectorAll('img').forEach(img => {
img.src = resolveUrl(img.getAttribute('src'));
});
el.querySelectorAll('source').forEach(source => {
const srcset = source.getAttribute('srcset');
if (srcset) {
source.srcset = srcset.split(',').map(entry => {
const parts = entry.trim().split(/\\s+/);
parts[0] = resolveUrl(parts[0]);
return parts.join(' ');
}).join(', ');
}
});
}
// Wrap tables for horizontal scrolling
el.querySelectorAll('table').forEach(table => {
const wrapper = document.createElement('div');
wrapper.className = 'table-wrapper';
table.parentNode.insertBefore(wrapper, table);
wrapper.appendChild(table);
});
function reportHeight() {
const h = document.body.scrollHeight;
if (h > 0) {
window.webkit.messageHandlers.heightReporter.postMessage(h);
}
}
const observer = new ResizeObserver(() => reportHeight());
observer.observe(document.getElementById('content'));
setTimeout(reportHeight, 500);
</script>
</body>
</html>
"""
// Use raw/ endpoint so relative image paths resolve to actual files
let rawBaseURL = baseURL.flatMap { url -> URL? in
let str = url.absoluteString
guard let range = str.range(of: "/src/branch/") else { return url }
return URL(string: str.replacingCharacters(in: range, with: "/raw/branch/"))
}
webView.loadHTMLString(html, baseURL: rawBaseURL)
}
static func dismantleUIView(_ webView: WKWebView, coordinator _: Coordinator) {
webView.configuration.userContentController.removeScriptMessageHandler(
forName: "heightReporter",
)
}
class Coordinator: NSObject, WKScriptMessageHandler, WKNavigationDelegate {
var parent: HTMLBlockView
var lastRenderedKey: String?
init(parent: HTMLBlockView) {
self.parent = parent
}
func userContentController(
_: WKUserContentController, didReceive message: WKScriptMessage,
) {
if let height = message.body as? CGFloat, height > 0 {
DispatchQueue.main.async { [weak self] in
self?.parent.height = height
}
}
}
// swiftlint:disable:next line_length
func webView(_: WKWebView, decidePolicyFor navigationAction: WKNavigationAction, decisionHandler: @escaping (WKNavigationActionPolicy) -> Void) {
guard navigationAction.navigationType == .linkActivated,
let url = navigationAction.request.url
else {
decisionHandler(.allow)
return
}
decisionHandler(.cancel)
if let onNavigateToFile = parent.onNavigateToFile,
let path = repoRelativePath(from: url)
{
onNavigateToFile(path)
} else {
UIApplication.shared.open(url)
}
}
}
}

View file

@ -164,6 +164,22 @@ struct SettingsTabView: View {
"HighlightSwift (MIT)",
destination: URL(string: "https://github.com/appstefan/HighlightSwift")!,
)
Link(
"mermaid (MIT)",
destination: URL(string: "https://github.com/mermaid-js/mermaid")!,
)
Link(
"marked (MIT)",
destination: URL(string: "https://github.com/markedjs/marked")!,
)
Link(
"DOMPurify (Apache 2.0/MPL 2.0)",
destination: URL(string: "https://github.com/cure53/DOMPurify")!,
)
Link(
"github-markdown-css (MIT)",
destination: URL(string: "https://github.com/sindresorhus/github-markdown-css")!,
)
}
.font(.caption)
.foregroundStyle(.secondary)

View file

@ -123,9 +123,14 @@ struct MarkdownPreview: View {
var body: some View {
let segments = MermaidParser.parse(text)
let hasMermaid = segments.contains { if case .mermaid = $0 { true } else { false } }
let hasSpecialSegments = segments.contains {
switch $0 {
case .mermaid, .html: true
case .text: false
}
}
if hasMermaid {
if hasSpecialSegments {
VStack(alignment: .leading, spacing: 12) {
ForEach(Array(segments.enumerated()), id: \.offset) { _, segment in
switch segment {
@ -144,6 +149,12 @@ struct MarkdownPreview: View {
.textual.textSelection(.enabled)
case let .mermaid(code):
MermaidDiagramView(code: code)
case let .html(content):
HTMLBlockDiagramView(
markdown: content,
baseURL: baseURL,
onNavigateToFile: onNavigateToFile,
)
}
}
}
@ -205,6 +216,23 @@ struct MermaidDiagramView: View {
}
}
struct HTMLBlockDiagramView: View {
let markdown: String
var baseURL: URL?
var onNavigateToFile: ((String) -> Void)?
@State private var height: CGFloat = 100
var body: some View {
HTMLBlockView(
markdown: markdown,
height: $height,
baseURL: baseURL,
onNavigateToFile: onNavigateToFile,
)
.frame(height: height)
}
}
#if DEBUG
#Preview("Editor") {
@Previewable @State var text = "# Hello\n\nThis is **bold** and *italic* text.\n\n- Item 1\n- Item 2"

View file

@ -2,6 +2,8 @@ import SwiftUI
import WebKit
struct MermaidWebView: UIViewRepresentable {
private static let mermaidJS = BundledWebResources.contents(of: "mermaid.min.js")
let code: String
@Binding var height: CGFloat
@Environment(\.colorScheme) private var colorScheme
@ -39,8 +41,9 @@ struct MermaidWebView: UIViewRepresentable {
.replacingOccurrences(of: "\"", with: "&quot;")
.replacingOccurrences(of: "'", with: "&#39;")
// swiftlint:disable:next line_length
let csp = "default-src 'none'; script-src https://cdn.jsdelivr.net 'unsafe-inline'; style-src 'unsafe-inline'; img-src data:;"
let mermaidJS = Self.mermaidJS
let csp =
"default-src 'none'; script-src 'unsafe-inline'; style-src 'unsafe-inline'; img-src data:;"
let html = """
<!DOCTYPE html>
<html>
@ -68,8 +71,8 @@ struct MermaidWebView: UIViewRepresentable {
<pre class="mermaid">
\(safeCode)
</pre>
<script type="module">
import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
<script>\(mermaidJS)</script>
<script>
mermaid.initialize({
startOnLoad: true,
theme: '\(theme)',

View file

@ -0,0 +1,336 @@
import Foundation
import Testing
@testable import Forji
@MainActor
struct HTMLParserTests {
// MARK: - Block-level extraction
@Test func detailsBlockExtracted() {
let markdown = """
# Title
Some text.
<details>
<summary>Click me</summary>
Hidden content here.
</details>
More text.
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(htmlSegments.count == 1)
if case .html(let content) = htmlSegments[0] {
#expect(content.contains("<details>"))
#expect(content.contains("</details>"))
}
}
@Test func tableBlockExtracted() {
let markdown = """
Before table.
<table>
<tr><th>Header</th></tr>
<tr><td>Cell</td></tr>
</table>
After table.
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(htmlSegments.count == 1)
if case .html(let content) = htmlSegments[0] {
#expect(content.contains("<table>"))
#expect(content.contains("</table>"))
}
}
@Test func divBlockExtracted() {
let markdown = """
Text.
<div align="center">
Content
</div>
More.
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(htmlSegments.count == 1)
}
@Test func pictureBlockExtracted() {
let markdown = """
Text.
<picture>
<source media="(prefers-color-scheme: dark)" srcset="dark.png">
<img src="light.png">
</picture>
More.
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(htmlSegments.count == 1)
}
// MARK: - Inline HTML extraction
@Test func kbdInParagraphExtracted() {
let markdown = """
Some text.
Press <kbd>Ctrl</kbd>+<kbd>C</kbd> to copy.
More text.
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(htmlSegments.count == 1)
if case .html(let content) = htmlSegments[0] {
#expect(content.contains("<kbd>"))
}
}
@Test func supSubInParagraphExtracted() {
let markdown = """
Normal paragraph.
Water is H<sub>2</sub>O and E=mc<sup>2</sup>.
Another normal paragraph.
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(htmlSegments.count == 1)
}
@Test func standaloneImgExtracted() {
let markdown = """
Text before.
<img src="image.png" alt="test" width="200">
Text after.
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(htmlSegments.count == 1)
}
// MARK: - No false positives
@Test func pureMarkdownRemainsText() {
let markdown = """
# Hello
This is **bold** and *italic* text.
- Item 1
- Item 2
[A link](https://example.com)
"""
let segments = MermaidParser.parse(markdown)
#expect(segments.count == 1)
if case .text = segments[0] {} else {
Issue.record("Expected .text segment")
}
}
@Test func angleBracketsInMathNotExtracted() {
let markdown = "If x < 10 and y > 5 then do something."
let segments = MermaidParser.parse(markdown)
#expect(segments.count == 1)
if case .text = segments[0] {} else {
Issue.record("Expected .text segment for math angle brackets")
}
}
@Test func htmlInsideFencedCodeBlockNotExtracted() {
let markdown = """
Some text.
```html
<details>
<summary>Example</summary>
This is code.
</details>
```
More text.
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(htmlSegments.isEmpty, "HTML inside fenced code blocks should not be extracted")
}
@Test func htmlInsideInlineCodeNotExtracted() {
let markdown = "Use `<details>` tag for collapsible sections and `<kbd>` for keyboard keys."
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(htmlSegments.isEmpty, "HTML inside inline code should not be extracted")
}
@Test func markdownTablesRemainText() {
let markdown = """
| Header 1 | Header 2 |
|----------|----------|
| Cell 1 | Cell 2 |
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(htmlSegments.isEmpty, "Markdown tables should remain as .text")
}
// MARK: - Mixed content
@Test func markdownPlusBlockHTMLPlusMermaid() {
let markdown = """
# Title
Regular paragraph.
<details>
<summary>Expand</summary>
Hidden content.
</details>
```mermaid
graph TD
A --> B
```
Final text.
"""
let segments = MermaidParser.parse(markdown)
var hasText = false
var hasHTML = false
var hasMermaid = false
for segment in segments {
switch segment {
case .text: hasText = true
case .html: hasHTML = true
case .mermaid: hasMermaid = true
}
}
#expect(hasText, "Should have text segments")
#expect(hasHTML, "Should have html segments")
#expect(hasMermaid, "Should have mermaid segments")
}
// MARK: - Edge cases
@Test func emptyStringProducesTextSegment() {
let segments = MermaidParser.parse("")
#expect(segments == [.text("")])
}
@Test func nestedTagsExtractedAsOneBlock() {
let markdown = """
Text.
<details>
<summary>Outer</summary>
<div>
<table>
<tr><td>Nested</td></tr>
</table>
</div>
</details>
More.
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(htmlSegments.count == 1, "Nested tags should be extracted as a single block")
}
@Test func selfClosingTagsExtracted() {
let markdown = """
Text before.
<img src="test.png" />
Text after.
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(htmlSegments.count == 1)
}
// MARK: - Security: dangerous HTML is segmented as .html (so DOMPurify can sanitize it)
@Test func scriptTagSegmentedAsHTML() {
let markdown = """
Safe text.
<div><script>alert("xss")</script></div>
More safe text.
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(!htmlSegments.isEmpty, "Script inside block tag should be segmented as .html for sanitization")
}
@Test func imgWithOnerrorSegmentedAsHTML() {
let markdown = """
Text.
<img src="x" onerror="alert('xss')">
More text.
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(!htmlSegments.isEmpty, "img with onerror should be segmented as .html for sanitization")
}
@Test func dangerousInlineAttributesSegmentedAsHTML() {
let markdown = """
Normal paragraph.
<div onclick="alert('xss')">Click me</div>
Another paragraph.
"""
let segments = MermaidParser.parse(markdown)
let htmlSegments = segments.filter { if case .html = $0 { true } else { false } }
#expect(!htmlSegments.isEmpty, "div with onclick should be segmented as .html for sanitization")
}
// MARK: - Segment ordering
@Test func segmentOrderPreserved() {
let markdown = """
First paragraph.
<table>
<tr><td>Cell</td></tr>
</table>
Middle paragraph.
<details>
<summary>Details</summary>
Content.
</details>
Last paragraph.
"""
let segments = MermaidParser.parse(markdown)
#expect(segments.count == 5)
if case .text = segments[0] {} else { Issue.record("Expected .text at 0") }
if case .html = segments[1] {} else { Issue.record("Expected .html at 1") }
if case .text = segments[2] {} else { Issue.record("Expected .text at 2") }
if case .html = segments[3] {} else { Issue.record("Expected .html at 3") }
if case .text = segments[4] {} else { Issue.record("Expected .text at 4") }
}
}

View file

@ -86,6 +86,52 @@ final class RepositoryUITests: ForgejoReadOnlyUITestBase {
super.tearDown()
}
// MARK: - HTML in Markdown Rendering
@MainActor
func testHTMLInMarkdownRendering() throws {
navigateToRepoDetail("html-readme-repo")
// Wait for repo detail to load
XCTAssertTrue(app.staticTexts["html-readme-repo"].waitForExistence(timeout: 10))
// Raw HTML tags should NOT appear as plain text they should be rendered
XCTAssertFalse(
app.staticTexts["<details>"].waitForExistence(timeout: 5),
"Raw <details> tag should not be visible as plain text")
XCTAssertFalse(
app.staticTexts["<table>"].waitForExistence(timeout: 2),
"Raw <table> tag should not be visible as plain text")
XCTAssertFalse(
app.staticTexts["<kbd>"].waitForExistence(timeout: 2),
"Raw <kbd> tag should not be visible as plain text")
}
// MARK: - HTML Sanitization
@MainActor
func testDangerousHTMLIsStripped() throws {
navigateToRepoDetail("html-readme-repo")
XCTAssertTrue(app.staticTexts["html-readme-repo"].waitForExistence(timeout: 10))
// Script tags must be stripped by DOMPurify
XCTAssertFalse(
app.staticTexts["alert(\"xss\")"].waitForExistence(timeout: 5),
"Script content should be stripped by DOMPurify")
// Iframes must be stripped
XCTAssertFalse(
app.staticTexts["<iframe"].waitForExistence(timeout: 2),
"Iframe tags should be stripped by DOMPurify")
// onclick attributes must be stripped (the div text may render, but not as a clickable handler)
// Verify the safe content still renders
XCTAssertFalse(
app.staticTexts["onclick"].waitForExistence(timeout: 2),
"onclick attributes should be stripped by DOMPurify")
}
// MARK: - Branch Selector
@MainActor

View file

@ -107,6 +107,10 @@ The Forji logo is based on the [Forgejo logo](https://codeberg.org/forgejo/forge
- [ForgejoKit](https://codeberg.org/secana/ForgejoKit) — Forgejo API client (MIT)
- [Textual](https://github.com/gonzalezreal/textual) — Markdown rendering (MIT)
- [HighlightSwift](https://github.com/appstefan/HighlightSwift) — Code syntax highlighting (MIT)
- [mermaid](https://github.com/mermaid-js/mermaid) — Diagram rendering (MIT)
- [marked](https://github.com/markedjs/marked) — Markdown parser (MIT)
- [DOMPurify](https://github.com/cure53/DOMPurify) — HTML sanitizer (Apache 2.0 / MPL 2.0)
- [github-markdown-css](https://github.com/sindresorhus/github-markdown-css) — GitHub-style Markdown styling (MIT)
## Contributing

View file

@ -52,6 +52,11 @@ curl -sf -X POST "$BASE_URL/api/v1/user/repos" \
-H "Content-Type: application/json" \
-d '{"name": "archived-repo", "description": "An archived repository", "private": false, "auto_init": true}' &
curl -sf -X POST "$BASE_URL/api/v1/user/repos" \
-u "$ADMIN_AUTH" \
-H "Content-Type: application/json" \
-d '{"name": "html-readme-repo", "description": "Repo with HTML in README", "private": false, "auto_init": true}' &
wait
echo "Repositories created."
@ -62,6 +67,59 @@ curl -sf -X PATCH "$BASE_URL/api/v1/repos/$ADMIN_USER/archived-repo" \
-H "Content-Type: application/json" \
-d '{"archived": true}'
# Update html-readme-repo README with HTML content
echo "Updating html-readme-repo README..."
README_SHA=$(curl -sf "$BASE_URL/api/v1/repos/$ADMIN_USER/html-readme-repo/contents/README.md" \
-u "$ADMIN_AUTH" | python3 -c "import sys,json; print(json.load(sys.stdin)['sha'])")
README_PAYLOAD=$(python3 -c "
import base64, json
content = '''# HTML Readme Test
Regular markdown paragraph.
<details>
<summary>Click to expand</summary>
This content is hidden by default.
- Item A
- Item B
</details>
## Table
<table>
<tr><th>Name</th><th>Value</th></tr>
<tr><td>Alpha</td><td>1</td></tr>
<tr><td>Beta</td><td>2</td></tr>
</table>
## Keyboard Shortcuts
Press <kbd>Ctrl</kbd>+<kbd>C</kbd> to copy.
## Image
<img src=\"https://via.placeholder.com/150\" alt=\"placeholder\" width=\"150\">
## Security Test
<script>alert("xss")</script>
<img src=\"x\" onerror=\"alert('xss')\">
<iframe src=\"https://evil.example.com\"></iframe>
<div onclick=\"alert('xss')\">Click me</div>
'''
b64 = base64.b64encode(content.encode()).decode()
print(json.dumps({'content': b64, 'message': 'Add HTML to README', 'sha': '$README_SHA'}))
")
curl -sf -X PUT "$BASE_URL/api/v1/repos/$ADMIN_USER/html-readme-repo/contents/README.md" \
-u "$ADMIN_AUTH" \
-H "Content-Type: application/json" \
-d "$README_PAYLOAD"
# --- Phase 3: Create issues + code files + pagination issues (parallel) ---
# Create 25 pagination issues sequentially in a background subshell