From 8a1d8f719cc0f618d771400602afbc4e3222caf2 Mon Sep 17 00:00:00 2001 From: Ryan DeStefano <67760716+rdestefa@users.noreply.github.com> Date: Sat, 2 May 2026 02:05:40 -0700 Subject: [PATCH] Initial Attempt at Supporting Overriding Core Delimiter Processors --- README.md | 3 + .../commonmark/internal/DocumentParser.java | 9 +- .../internal/InlineParserContextImpl.java | 8 ++ .../commonmark/internal/InlineParserImpl.java | 42 +++++++- .../internal/StaggeredDelimiterProcessor.java | 13 +++ .../parser/InlineParserContext.java | 6 ++ .../java/org/commonmark/parser/Parser.java | 26 ++++- .../test/InlineParserContextTest.java | 5 + .../java/org/commonmark/test/ParserTest.java | 101 +++++++++++++++++- 9 files changed, 204 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 845226729..86e13fc55 100644 --- a/README.md +++ b/README.md @@ -258,6 +258,9 @@ all of them via methods on `Parser.Builder` - Parsing of inline content can be extended/overridden with `customInlineContentParserFactory` - Parsing of [delimiters](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis) in inline content can be extended with `customDelimiterProcessor` +- Core built-in delimiter parsing can be replaced with `overrideDelimiterProcessor` + to support alternate emphasis syntax such as single `_` meaning emphasis and + single `*` meaning strong emphasis. - Processing of links can be customized with `linkProcessor` and `linkMarker` #### Thread-safety diff --git a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java index 07d97296b..536038ad6 100644 --- a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java @@ -73,6 +73,7 @@ public class DocumentParser implements ParserState { private final InlineParserFactory inlineParserFactory; private final List inlineContentParserFactories; private final List delimiterProcessors; + private final List overrideDelimiterProcessors; private final List linkProcessors; private final Set linkMarkers; private final IncludeSourceSpans includeSourceSpans; @@ -85,12 +86,13 @@ public class DocumentParser implements ParserState { public DocumentParser(List blockParserFactories, InlineParserFactory inlineParserFactory, List inlineContentParserFactories, List delimiterProcessors, - List linkProcessors, Set linkMarkers, - IncludeSourceSpans includeSourceSpans, int maxOpenBlockParsers) { + List overrideDelimiterProcessors, List linkProcessors, + Set linkMarkers, IncludeSourceSpans includeSourceSpans, int maxOpenBlockParsers) { this.blockParserFactories = blockParserFactories; this.inlineParserFactory = inlineParserFactory; this.inlineContentParserFactories = inlineContentParserFactories; this.delimiterProcessors = delimiterProcessors; + this.overrideDelimiterProcessors = overrideDelimiterProcessors; this.linkProcessors = linkProcessors; this.linkMarkers = linkMarkers; this.includeSourceSpans = includeSourceSpans; @@ -481,7 +483,8 @@ private BlockStartImpl findBlockStart(BlockParser blockParser) { * Walk through a block & children recursively, parsing string content into inline content where appropriate. */ private void processInlines() { - var context = new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors, linkProcessors, linkMarkers, definitions); + var context = new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors, + overrideDelimiterProcessors, linkProcessors, linkMarkers, definitions); var inlineParser = inlineParserFactory.create(context); for (var blockParser : allBlockParsers) { diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java index 233041f62..9cda4d985 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java @@ -13,17 +13,20 @@ public class InlineParserContextImpl implements InlineParserContext { private final List inlineContentParserFactories; private final List delimiterProcessors; + private final List overrideDelimiterProcessors; private final List linkProcessors; private final Set linkMarkers; private final Definitions definitions; public InlineParserContextImpl(List inlineContentParserFactories, List delimiterProcessors, + List overrideDelimiterProcessors, List linkProcessors, Set linkMarkers, Definitions definitions) { this.inlineContentParserFactories = inlineContentParserFactories; this.delimiterProcessors = delimiterProcessors; + this.overrideDelimiterProcessors = overrideDelimiterProcessors; this.linkProcessors = linkProcessors; this.linkMarkers = linkMarkers; this.definitions = definitions; @@ -39,6 +42,11 @@ public List getCustomDelimiterProcessors() { return delimiterProcessors; } + @Override + public List getOverrideDelimiterProcessors() { + return overrideDelimiterProcessors; + } + @Override public List getCustomLinkProcessors() { return linkProcessors; diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java index 44422f421..b00579d80 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java @@ -42,7 +42,7 @@ public class InlineParserImpl implements InlineParser, InlineParserState { public InlineParserImpl(InlineParserContext context) { this.context = context; this.inlineContentParserFactories = calculateInlineContentParserFactories(context.getCustomInlineContentParserFactories()); - this.delimiterProcessors = calculateDelimiterProcessors(context.getCustomDelimiterProcessors()); + this.delimiterProcessors = calculateDelimiterProcessors(context.getCustomDelimiterProcessors(), context.getOverrideDelimiterProcessors()); this.linkProcessors = calculateLinkProcessors(context.getCustomLinkProcessors()); this.linkMarkers = calculateLinkMarkers(context.getCustomLinkMarkers()); this.specialCharacters = calculateSpecialCharacters(linkMarkers, this.delimiterProcessors.keySet(), this.inlineContentParserFactories); @@ -66,13 +66,51 @@ private List calculateLinkProcessors(List linkProc return list; } - private static Map calculateDelimiterProcessors(List delimiterProcessors) { + private static Map calculateDelimiterProcessors(List delimiterProcessors, + List overrideDelimiterProcessors) { var map = new HashMap(); addDelimiterProcessors(List.of(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map); + addOverrideDelimiterProcessors(overrideDelimiterProcessors, map); addDelimiterProcessors(delimiterProcessors, map); return map; } + private static void addOverrideDelimiterProcessors(Iterable delimiterProcessors, + Map map) { + for (DelimiterProcessor delimiterProcessor : delimiterProcessors) { + char opening = delimiterProcessor.getOpeningCharacter(); + char closing = delimiterProcessor.getClosingCharacter(); + if (opening == closing) { + replaceOrAddDelimiterProcessorForChar(opening, delimiterProcessor, map); + } else { + replaceOrAddDelimiterProcessorForChar(opening, delimiterProcessor, map); + replaceOrAddDelimiterProcessorForChar(closing, delimiterProcessor, map); + } + } + } + + private static void replaceOrAddDelimiterProcessorForChar(char delimiterChar, + DelimiterProcessor delimiterProcessor, + Map delimiterProcessors) { + DelimiterProcessor existing = delimiterProcessors.get(delimiterChar); + if (existing == null) { + delimiterProcessors.put(delimiterChar, delimiterProcessor); + return; + } + if (existing instanceof StaggeredDelimiterProcessor) { + ((StaggeredDelimiterProcessor) existing).replace(delimiterProcessor); + return; + } + if (existing.getMinLength() == delimiterProcessor.getMinLength()) { + delimiterProcessors.put(delimiterChar, delimiterProcessor); + return; + } + StaggeredDelimiterProcessor staggered = new StaggeredDelimiterProcessor(delimiterChar); + staggered.add(existing); + staggered.add(delimiterProcessor); + delimiterProcessors.put(delimiterChar, staggered); + } + private static void addDelimiterProcessors(Iterable delimiterProcessors, Map map) { for (DelimiterProcessor delimiterProcessor : delimiterProcessors) { char opening = delimiterProcessor.getOpeningCharacter(); diff --git a/commonmark/src/main/java/org/commonmark/internal/StaggeredDelimiterProcessor.java b/commonmark/src/main/java/org/commonmark/internal/StaggeredDelimiterProcessor.java index 2836e346a..327290fb0 100644 --- a/commonmark/src/main/java/org/commonmark/internal/StaggeredDelimiterProcessor.java +++ b/commonmark/src/main/java/org/commonmark/internal/StaggeredDelimiterProcessor.java @@ -60,6 +60,19 @@ void add(DelimiterProcessor dp) { } } + void replace(DelimiterProcessor dp) { + final int len = dp.getMinLength(); + ListIterator it = processors.listIterator(); + while (it.hasNext()) { + DelimiterProcessor p = it.next(); + if (p.getMinLength() == len) { + it.set(dp); + return; + } + } + add(dp); + } + private DelimiterProcessor findProcessor(int len) { for (DelimiterProcessor p : processors) { if (p.getMinLength() <= len) { diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java index 12007610b..a56584e1a 100644 --- a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java +++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java @@ -25,6 +25,12 @@ public interface InlineParserContext { */ List getCustomDelimiterProcessors(); + /** + * @return delimiter processors that have been configured with + * {@link Parser.Builder#overrideDelimiterProcessor(DelimiterProcessor)} + */ + List getOverrideDelimiterProcessors(); + /** * @return custom link processors that have been configured with {@link Parser.Builder#linkProcessor}. */ diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java index 8faac789b..02c2b50e8 100644 --- a/commonmark/src/main/java/org/commonmark/parser/Parser.java +++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java @@ -17,7 +17,6 @@ import java.io.Reader; import java.util.*; - /** * Parses input text to a tree of nodes. *

@@ -32,6 +31,7 @@ public class Parser { private final List blockParserFactories; private final List inlineContentParserFactories; private final List delimiterProcessors; + private final List overrideDelimiterProcessors; private final List linkProcessors; private final Set linkMarkers; private final InlineParserFactory inlineParserFactory; @@ -45,6 +45,7 @@ private Parser(Builder builder) { this.postProcessors = builder.postProcessors; this.inlineContentParserFactories = builder.inlineContentParserFactories; this.delimiterProcessors = builder.delimiterProcessors; + this.overrideDelimiterProcessors = builder.overrideDelimiterProcessors; this.linkProcessors = builder.linkProcessors; this.linkMarkers = builder.linkMarkers; this.includeSourceSpans = builder.includeSourceSpans; @@ -53,7 +54,8 @@ private Parser(Builder builder) { // Try to construct an inline parser. Invalid configuration might result in an exception, which we want to // detect as soon as possible. var context = new InlineParserContextImpl( - inlineContentParserFactories, delimiterProcessors, linkProcessors, linkMarkers, new Definitions()); + inlineContentParserFactories, delimiterProcessors, overrideDelimiterProcessors, + linkProcessors, linkMarkers, new Definitions()); this.inlineParserFactory.create(context); } @@ -108,7 +110,7 @@ public Node parseReader(Reader input) throws IOException { private DocumentParser createDocumentParser() { return new DocumentParser(blockParserFactories, inlineParserFactory, inlineContentParserFactories, - delimiterProcessors, linkProcessors, linkMarkers, includeSourceSpans, maxOpenBlockParsers); + delimiterProcessors, overrideDelimiterProcessors, linkProcessors, linkMarkers, includeSourceSpans, maxOpenBlockParsers); } private Node postProcess(Node document) { @@ -125,6 +127,7 @@ public static class Builder { private final List blockParserFactories = new ArrayList<>(); private final List inlineContentParserFactories = new ArrayList<>(); private final List delimiterProcessors = new ArrayList<>(); + private final List overrideDelimiterProcessors = new ArrayList<>(); private final List linkProcessors = new ArrayList<>(); private final List postProcessors = new ArrayList<>(); private final Set linkMarkers = new HashSet<>(); @@ -273,6 +276,23 @@ public Builder customDelimiterProcessor(DelimiterProcessor delimiterProcessor) { return this; } + /** + * Add a delimiter processor that replaces the built-in processor for the same delimiter character. + *

+ * This can be used to override core syntax such as emphasis and strong emphasis parsing. + * The built-in processor for the same delimiter character and minimum length is replaced. + * If a processor with the same delimiter character but a different minimum length exists, it is combined + * using the standard staggered delimiter processor behavior. + * + * @param delimiterProcessor a delimiter processor implementation + * @return {@code this} + */ + public Builder overrideDelimiterProcessor(DelimiterProcessor delimiterProcessor) { + Objects.requireNonNull(delimiterProcessor, "delimiterProcessor must not be null"); + overrideDelimiterProcessors.add(delimiterProcessor); + return this; + } + /** * Add a custom link/image processor for inline parsing. *

diff --git a/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java index c05cac2d2..0a7e4f9e6 100644 --- a/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java +++ b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java @@ -53,6 +53,11 @@ public List getCustomDelimiterProcessors() { return inlineParserContext.getCustomDelimiterProcessors(); } + @Override + public List getOverrideDelimiterProcessors() { + return inlineParserContext.getOverrideDelimiterProcessors(); + } + @Override public List getCustomLinkProcessors() { return inlineParserContext.getCustomLinkProcessors(); diff --git a/commonmark/src/test/java/org/commonmark/test/ParserTest.java b/commonmark/src/test/java/org/commonmark/test/ParserTest.java index 337196c56..8108f6a7a 100644 --- a/commonmark/src/test/java/org/commonmark/test/ParserTest.java +++ b/commonmark/src/test/java/org/commonmark/test/ParserTest.java @@ -2,6 +2,8 @@ import org.commonmark.node.*; import org.commonmark.parser.*; +import org.commonmark.parser.delimiter.DelimiterProcessor; +import org.commonmark.parser.delimiter.DelimiterRun; import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.renderer.markdown.MarkdownRenderer; import org.commonmark.testutil.TestResources; @@ -13,7 +15,6 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashSet; -import java.util.List; import java.util.Set; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -111,6 +112,104 @@ public InlineParser create(InlineParserContext inlineParserContext) { assertThat(parser.parse(input).getFirstChild().getFirstChild()).isInstanceOf(ThematicBreak.class); } + @Test + public void overrideDelimiterProcessorReplacesBuiltInProcessor() { + Parser parser = Parser.builder().overrideDelimiterProcessor(new DelimiterProcessor() { + @Override + public char getOpeningCharacter() { + return '*'; + } + + @Override + public char getClosingCharacter() { + return '*'; + } + + @Override + public int getMinLength() { + return 1; + } + + @Override + public int process(DelimiterRun openingRun, DelimiterRun closingRun) { + return 0; + } + }).build(); + + HtmlRenderer renderer = HtmlRenderer.builder().build(); + assertThat(renderer.render(parser.parse("*hello*"))).isEqualTo("

*hello*

\n"); + } + + @Test + public void overrideDelimiterProcessorCanSwitchAsteriskAndUnderscoreEmphasisSemantics() { + Parser parser = Parser.builder() + .overrideDelimiterProcessor(new DelimiterProcessor() { + @Override + public char getOpeningCharacter() { + return '*'; + } + + @Override + public char getClosingCharacter() { + return '*'; + } + + @Override + public int getMinLength() { + return 1; + } + + @Override + public int process(DelimiterRun openingRun, DelimiterRun closingRun) { + Text opener = openingRun.getOpener(); + Node closer = closingRun.getCloser(); + StrongEmphasis emphasis = new StrongEmphasis("*"); + for (Node node = opener.getNext(); node != closer; ) { + Node next = node.getNext(); + emphasis.appendChild(node); + node = next; + } + opener.insertAfter(emphasis); + return 1; + } + }) + .overrideDelimiterProcessor(new DelimiterProcessor() { + @Override + public char getOpeningCharacter() { + return '_'; + } + + @Override + public char getClosingCharacter() { + return '_'; + } + + @Override + public int getMinLength() { + return 1; + } + + @Override + public int process(DelimiterRun openingRun, DelimiterRun closingRun) { + Text opener = openingRun.getOpener(); + Node closer = closingRun.getCloser(); + Emphasis emphasis = new Emphasis("_"); + for (Node node = opener.getNext(); node != closer; ) { + Node next = node.getNext(); + emphasis.appendChild(node); + node = next; + } + opener.insertAfter(emphasis); + return 1; + } + }) + .build(); + + HtmlRenderer renderer = HtmlRenderer.builder().build(); + assertThat(renderer.render(parser.parse("*bold*"))).isEqualTo("

bold

\n"); + assertThat(renderer.render(parser.parse("_italic_"))).isEqualTo("

italic

\n"); + } + @Test public void threading() throws Exception { var parser = Parser.builder().build();