Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,9 @@ all of them via methods on `Parser.Builder`
- Parsing of inline content can be extended/overridden with `customInlineContentParserFactory`
- Parsing of [delimiters](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis) in inline content can be
extended with `customDelimiterProcessor`
- Core built-in delimiter parsing can be replaced with `overrideDelimiterProcessor`
to support alternate emphasis syntax such as single `_` meaning emphasis and
single `*` meaning strong emphasis.
- Processing of links can be customized with `linkProcessor` and `linkMarker`

#### Thread-safety
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ public class DocumentParser implements ParserState {
private final InlineParserFactory inlineParserFactory;
private final List<InlineContentParserFactory> inlineContentParserFactories;
private final List<DelimiterProcessor> delimiterProcessors;
private final List<DelimiterProcessor> overrideDelimiterProcessors;
private final List<LinkProcessor> linkProcessors;
private final Set<Character> linkMarkers;
private final IncludeSourceSpans includeSourceSpans;
Expand All @@ -85,12 +86,13 @@ public class DocumentParser implements ParserState {

public DocumentParser(List<BlockParserFactory> blockParserFactories, InlineParserFactory inlineParserFactory,
List<InlineContentParserFactory> inlineContentParserFactories, List<DelimiterProcessor> delimiterProcessors,
List<LinkProcessor> linkProcessors, Set<Character> linkMarkers,
IncludeSourceSpans includeSourceSpans, int maxOpenBlockParsers) {
List<DelimiterProcessor> overrideDelimiterProcessors, List<LinkProcessor> linkProcessors,
Set<Character> linkMarkers, IncludeSourceSpans includeSourceSpans, int maxOpenBlockParsers) {
this.blockParserFactories = blockParserFactories;
this.inlineParserFactory = inlineParserFactory;
this.inlineContentParserFactories = inlineContentParserFactories;
this.delimiterProcessors = delimiterProcessors;
this.overrideDelimiterProcessors = overrideDelimiterProcessors;
this.linkProcessors = linkProcessors;
this.linkMarkers = linkMarkers;
this.includeSourceSpans = includeSourceSpans;
Expand Down Expand Up @@ -481,7 +483,8 @@ private BlockStartImpl findBlockStart(BlockParser blockParser) {
* Walk through a block & children recursively, parsing string content into inline content where appropriate.
*/
private void processInlines() {
var context = new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors, linkProcessors, linkMarkers, definitions);
var context = new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors,
overrideDelimiterProcessors, linkProcessors, linkMarkers, definitions);
var inlineParser = inlineParserFactory.create(context);

for (var blockParser : allBlockParsers) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,20 @@ public class InlineParserContextImpl implements InlineParserContext {

private final List<InlineContentParserFactory> inlineContentParserFactories;
private final List<DelimiterProcessor> delimiterProcessors;
private final List<DelimiterProcessor> overrideDelimiterProcessors;
private final List<LinkProcessor> linkProcessors;
private final Set<Character> linkMarkers;
private final Definitions definitions;

public InlineParserContextImpl(List<InlineContentParserFactory> inlineContentParserFactories,
List<DelimiterProcessor> delimiterProcessors,
List<DelimiterProcessor> overrideDelimiterProcessors,
List<LinkProcessor> linkProcessors,
Set<Character> linkMarkers,
Definitions definitions) {
this.inlineContentParserFactories = inlineContentParserFactories;
this.delimiterProcessors = delimiterProcessors;
this.overrideDelimiterProcessors = overrideDelimiterProcessors;
this.linkProcessors = linkProcessors;
this.linkMarkers = linkMarkers;
this.definitions = definitions;
Expand All @@ -39,6 +42,11 @@ public List<DelimiterProcessor> getCustomDelimiterProcessors() {
return delimiterProcessors;
}

@Override
public List<DelimiterProcessor> getOverrideDelimiterProcessors() {
return overrideDelimiterProcessors;
}

@Override
public List<LinkProcessor> getCustomLinkProcessors() {
return linkProcessors;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public class InlineParserImpl implements InlineParser, InlineParserState {
public InlineParserImpl(InlineParserContext context) {
this.context = context;
this.inlineContentParserFactories = calculateInlineContentParserFactories(context.getCustomInlineContentParserFactories());
this.delimiterProcessors = calculateDelimiterProcessors(context.getCustomDelimiterProcessors());
this.delimiterProcessors = calculateDelimiterProcessors(context.getCustomDelimiterProcessors(), context.getOverrideDelimiterProcessors());
this.linkProcessors = calculateLinkProcessors(context.getCustomLinkProcessors());
this.linkMarkers = calculateLinkMarkers(context.getCustomLinkMarkers());
this.specialCharacters = calculateSpecialCharacters(linkMarkers, this.delimiterProcessors.keySet(), this.inlineContentParserFactories);
Expand All @@ -66,13 +66,51 @@ private List<LinkProcessor> calculateLinkProcessors(List<LinkProcessor> linkProc
return list;
}

private static Map<Character, DelimiterProcessor> calculateDelimiterProcessors(List<DelimiterProcessor> delimiterProcessors) {
private static Map<Character, DelimiterProcessor> calculateDelimiterProcessors(List<DelimiterProcessor> delimiterProcessors,
List<DelimiterProcessor> overrideDelimiterProcessors) {
var map = new HashMap<Character, DelimiterProcessor>();
addDelimiterProcessors(List.of(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map);
addOverrideDelimiterProcessors(overrideDelimiterProcessors, map);
addDelimiterProcessors(delimiterProcessors, map);
return map;
}

private static void addOverrideDelimiterProcessors(Iterable<DelimiterProcessor> delimiterProcessors,
Map<Character, DelimiterProcessor> map) {
for (DelimiterProcessor delimiterProcessor : delimiterProcessors) {
char opening = delimiterProcessor.getOpeningCharacter();
char closing = delimiterProcessor.getClosingCharacter();
if (opening == closing) {
replaceOrAddDelimiterProcessorForChar(opening, delimiterProcessor, map);
} else {
replaceOrAddDelimiterProcessorForChar(opening, delimiterProcessor, map);
replaceOrAddDelimiterProcessorForChar(closing, delimiterProcessor, map);
}
}
}

private static void replaceOrAddDelimiterProcessorForChar(char delimiterChar,
DelimiterProcessor delimiterProcessor,
Map<Character, DelimiterProcessor> delimiterProcessors) {
DelimiterProcessor existing = delimiterProcessors.get(delimiterChar);
if (existing == null) {
delimiterProcessors.put(delimiterChar, delimiterProcessor);
return;
}
if (existing instanceof StaggeredDelimiterProcessor) {
((StaggeredDelimiterProcessor) existing).replace(delimiterProcessor);
return;
}
if (existing.getMinLength() == delimiterProcessor.getMinLength()) {
delimiterProcessors.put(delimiterChar, delimiterProcessor);
return;
}
StaggeredDelimiterProcessor staggered = new StaggeredDelimiterProcessor(delimiterChar);
staggered.add(existing);
staggered.add(delimiterProcessor);
delimiterProcessors.put(delimiterChar, staggered);
}

private static void addDelimiterProcessors(Iterable<DelimiterProcessor> delimiterProcessors, Map<Character, DelimiterProcessor> map) {
for (DelimiterProcessor delimiterProcessor : delimiterProcessors) {
char opening = delimiterProcessor.getOpeningCharacter();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,19 @@ void add(DelimiterProcessor dp) {
}
}

void replace(DelimiterProcessor dp) {
final int len = dp.getMinLength();
ListIterator<DelimiterProcessor> it = processors.listIterator();
while (it.hasNext()) {
DelimiterProcessor p = it.next();
if (p.getMinLength() == len) {
it.set(dp);
return;
}
}
add(dp);
}

private DelimiterProcessor findProcessor(int len) {
for (DelimiterProcessor p : processors) {
if (p.getMinLength() <= len) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ public interface InlineParserContext {
*/
List<DelimiterProcessor> getCustomDelimiterProcessors();

/**
* @return delimiter processors that have been configured with
* {@link Parser.Builder#overrideDelimiterProcessor(DelimiterProcessor)}
*/
List<DelimiterProcessor> getOverrideDelimiterProcessors();

/**
* @return custom link processors that have been configured with {@link Parser.Builder#linkProcessor}.
*/
Expand Down
26 changes: 23 additions & 3 deletions commonmark/src/main/java/org/commonmark/parser/Parser.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import java.io.Reader;
import java.util.*;


/**
* Parses input text to a tree of nodes.
* <p>
Expand All @@ -32,6 +31,7 @@ public class Parser {
private final List<BlockParserFactory> blockParserFactories;
private final List<InlineContentParserFactory> inlineContentParserFactories;
private final List<DelimiterProcessor> delimiterProcessors;
private final List<DelimiterProcessor> overrideDelimiterProcessors;
private final List<LinkProcessor> linkProcessors;
private final Set<Character> linkMarkers;
private final InlineParserFactory inlineParserFactory;
Expand All @@ -45,6 +45,7 @@ private Parser(Builder builder) {
this.postProcessors = builder.postProcessors;
this.inlineContentParserFactories = builder.inlineContentParserFactories;
this.delimiterProcessors = builder.delimiterProcessors;
this.overrideDelimiterProcessors = builder.overrideDelimiterProcessors;
this.linkProcessors = builder.linkProcessors;
this.linkMarkers = builder.linkMarkers;
this.includeSourceSpans = builder.includeSourceSpans;
Expand All @@ -53,7 +54,8 @@ private Parser(Builder builder) {
// Try to construct an inline parser. Invalid configuration might result in an exception, which we want to
// detect as soon as possible.
var context = new InlineParserContextImpl(
inlineContentParserFactories, delimiterProcessors, linkProcessors, linkMarkers, new Definitions());
inlineContentParserFactories, delimiterProcessors, overrideDelimiterProcessors,
linkProcessors, linkMarkers, new Definitions());
this.inlineParserFactory.create(context);
}

Expand Down Expand Up @@ -108,7 +110,7 @@ public Node parseReader(Reader input) throws IOException {

private DocumentParser createDocumentParser() {
return new DocumentParser(blockParserFactories, inlineParserFactory, inlineContentParserFactories,
delimiterProcessors, linkProcessors, linkMarkers, includeSourceSpans, maxOpenBlockParsers);
delimiterProcessors, overrideDelimiterProcessors, linkProcessors, linkMarkers, includeSourceSpans, maxOpenBlockParsers);
}

private Node postProcess(Node document) {
Expand All @@ -125,6 +127,7 @@ public static class Builder {
private final List<BlockParserFactory> blockParserFactories = new ArrayList<>();
private final List<InlineContentParserFactory> inlineContentParserFactories = new ArrayList<>();
private final List<DelimiterProcessor> delimiterProcessors = new ArrayList<>();
private final List<DelimiterProcessor> overrideDelimiterProcessors = new ArrayList<>();
private final List<LinkProcessor> linkProcessors = new ArrayList<>();
private final List<PostProcessor> postProcessors = new ArrayList<>();
private final Set<Character> linkMarkers = new HashSet<>();
Expand Down Expand Up @@ -273,6 +276,23 @@ public Builder customDelimiterProcessor(DelimiterProcessor delimiterProcessor) {
return this;
}

/**
* Add a delimiter processor that replaces the built-in processor for the same delimiter character.
* <p>
* This can be used to override core syntax such as emphasis and strong emphasis parsing.
* The built-in processor for the same delimiter character and minimum length is replaced.
* If a processor with the same delimiter character but a different minimum length exists, it is combined
* using the standard staggered delimiter processor behavior.
*
* @param delimiterProcessor a delimiter processor implementation
* @return {@code this}
*/
public Builder overrideDelimiterProcessor(DelimiterProcessor delimiterProcessor) {
Objects.requireNonNull(delimiterProcessor, "delimiterProcessor must not be null");
overrideDelimiterProcessors.add(delimiterProcessor);
return this;
}

/**
* Add a custom link/image processor for inline parsing.
* <p>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ public List<DelimiterProcessor> getCustomDelimiterProcessors() {
return inlineParserContext.getCustomDelimiterProcessors();
}

@Override
public List<DelimiterProcessor> getOverrideDelimiterProcessors() {
return inlineParserContext.getOverrideDelimiterProcessors();
}

@Override
public List<LinkProcessor> getCustomLinkProcessors() {
return inlineParserContext.getCustomLinkProcessors();
Expand Down
101 changes: 100 additions & 1 deletion commonmark/src/test/java/org/commonmark/test/ParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import org.commonmark.node.*;
import org.commonmark.parser.*;
import org.commonmark.parser.delimiter.DelimiterProcessor;
import org.commonmark.parser.delimiter.DelimiterRun;
import org.commonmark.renderer.html.HtmlRenderer;
import org.commonmark.renderer.markdown.MarkdownRenderer;
import org.commonmark.testutil.TestResources;
Expand All @@ -13,7 +15,6 @@
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
Expand Down Expand Up @@ -111,6 +112,104 @@ public InlineParser create(InlineParserContext inlineParserContext) {
assertThat(parser.parse(input).getFirstChild().getFirstChild()).isInstanceOf(ThematicBreak.class);
}

@Test
public void overrideDelimiterProcessorReplacesBuiltInProcessor() {
Parser parser = Parser.builder().overrideDelimiterProcessor(new DelimiterProcessor() {
@Override
public char getOpeningCharacter() {
return '*';
}

@Override
public char getClosingCharacter() {
return '*';
}

@Override
public int getMinLength() {
return 1;
}

@Override
public int process(DelimiterRun openingRun, DelimiterRun closingRun) {
return 0;
}
}).build();

HtmlRenderer renderer = HtmlRenderer.builder().build();
assertThat(renderer.render(parser.parse("*hello*"))).isEqualTo("<p>*hello*</p>\n");
}

@Test
public void overrideDelimiterProcessorCanSwitchAsteriskAndUnderscoreEmphasisSemantics() {
Parser parser = Parser.builder()
.overrideDelimiterProcessor(new DelimiterProcessor() {
@Override
public char getOpeningCharacter() {
return '*';
}

@Override
public char getClosingCharacter() {
return '*';
}

@Override
public int getMinLength() {
return 1;
}

@Override
public int process(DelimiterRun openingRun, DelimiterRun closingRun) {
Text opener = openingRun.getOpener();
Node closer = closingRun.getCloser();
StrongEmphasis emphasis = new StrongEmphasis("*");
for (Node node = opener.getNext(); node != closer; ) {
Node next = node.getNext();
emphasis.appendChild(node);
node = next;
}
opener.insertAfter(emphasis);
return 1;
}
})
.overrideDelimiterProcessor(new DelimiterProcessor() {
@Override
public char getOpeningCharacter() {
return '_';
}

@Override
public char getClosingCharacter() {
return '_';
}

@Override
public int getMinLength() {
return 1;
}

@Override
public int process(DelimiterRun openingRun, DelimiterRun closingRun) {
Text opener = openingRun.getOpener();
Node closer = closingRun.getCloser();
Emphasis emphasis = new Emphasis("_");
for (Node node = opener.getNext(); node != closer; ) {
Node next = node.getNext();
emphasis.appendChild(node);
node = next;
}
opener.insertAfter(emphasis);
return 1;
}
})
.build();

HtmlRenderer renderer = HtmlRenderer.builder().build();
assertThat(renderer.render(parser.parse("*bold*"))).isEqualTo("<p><strong>bold</strong></p>\n");
assertThat(renderer.render(parser.parse("_italic_"))).isEqualTo("<p><em>italic</em></p>\n");
}

@Test
public void threading() throws Exception {
var parser = Parser.builder().build();
Expand Down
Loading