diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 19ad24bb0..b32794271 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -9,46 +9,53 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- java: [1.8, 11]
+ java: [11, 17, 21, 25]
steps:
- name: Checkout sources
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Set up JDK
- uses: actions/setup-java@v1
+ uses: actions/setup-java@v4
with:
java-version: ${{ matrix.java }}
+ distribution: 'zulu'
- name: Build
run: mvn -B package javadoc:javadoc
coverage:
runs-on: ubuntu-latest
+ if: ${{ github.event_name == 'push' }}
steps:
- name: Checkout sources
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Set up JDK
- uses: actions/setup-java@v1
+ uses: actions/setup-java@v4
with:
- java-version: 1.8
+ java-version: 11
+ distribution: 'zulu'
- name: Build with coverage
run: mvn -B -Pcoverage clean test jacoco:report-aggregate
- name: Publish coverage
- uses: codecov/codecov-action@v1
+ uses: codecov/codecov-action@v4
+ with:
+ fail_ci_if_error: true
+ token: ${{ secrets.CODECOV_TOKEN }}
android-compatibility:
runs-on: ubuntu-latest
steps:
- name: Checkout sources
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Set up JDK
- uses: actions/setup-java@v1
+ uses: actions/setup-java@v4
with:
- java-version: 1.8
+ java-version: 11
+ distribution: 'zulu'
- name: Android Lint checks
run: cd commonmark-android-test && ./gradlew :app:lint
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 4edf451c0..c0531ca55 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -14,15 +14,17 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout sources
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Set up Maven Central repository
- uses: actions/setup-java@v1
+ uses: actions/setup-java@v4
with:
- java-version: 1.8
- server-id: ossrh
- server-username: MAVEN_USERNAME # env variable to use for username in release
- server-password: MAVEN_PASSWORD # env variable to use for password in release
+ java-version: 24
+ distribution: 'zulu'
+ # See https://central.sonatype.org/publish/publish-portal-maven/
+ server-id: central
+ server-username: CENTRAL_USERNAME # env variable to use for username in release
+ server-password: CENTRAL_PASSWORD # env variable to use for password in release
gpg-private-key: ${{ secrets.MAVEN_GPG_PRIVATE_KEY }}
gpg-passphrase: MAVEN_GPG_PASSPHRASE # env variable to use for passphrase in release
@@ -36,6 +38,6 @@ jobs:
mvn -B -Dusername=${{ secrets.GH_USERNAME }} -Dpassword=${{ secrets.GH_ACCESS_TOKEN }} release:prepare
mvn -B release:perform
env:
- MAVEN_USERNAME: ${{ secrets.OSSRH_USERNAME }}
- MAVEN_PASSWORD: ${{ secrets.OSSRH_PASSWORD }}
+ CENTRAL_USERNAME: ${{ secrets.CENTRAL_USERNAME }}
+ CENTRAL_PASSWORD: ${{ secrets.CENTRAL_PASSWORD }}
MAVEN_GPG_PASSPHRASE: ${{ secrets.MAVEN_GPG_PASSPHRASE }}
diff --git a/.gitignore b/.gitignore
index a156931f0..d998d8890 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,6 @@
# Maven
target/
+
+# macOS
+.DS_Store
diff --git a/.mvn/wrapper/maven-wrapper.properties b/.mvn/wrapper/maven-wrapper.properties
new file mode 100644
index 000000000..4d245050f
--- /dev/null
+++ b/.mvn/wrapper/maven-wrapper.properties
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+wrapperVersion=3.3.2
+distributionType=only-script
+distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/apache-maven-3.6.3-bin.zip
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 18a4adbdb..9c5c67268 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,212 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html),
with the exception that 0.x versions can break between minor versions.
+## [Unreleased]
+### Added
+- Allow customizing HTML attributes for alert title `
+ * The {@link #getLabel() label} is the text in brackets after {@code ^}, so {@code foo} in the example. The contents
+ * of the footnote are child nodes of the definition, a {@link org.commonmark.node.Paragraph} in the example.
+ *
+ * Footnote definitions are parsed even if there's no corresponding {@link FootnoteReference}.
+ */
+public class FootnoteDefinition extends CustomBlock {
+
+ private String label;
+
+ public FootnoteDefinition(String label) {
+ this.label = label;
+ }
+
+ public String getLabel() {
+ return label;
+ }
+}
+
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnoteReference.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnoteReference.java
new file mode 100644
index 000000000..61dcf8626
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnoteReference.java
@@ -0,0 +1,21 @@
+package org.commonmark.ext.footnotes;
+
+import org.commonmark.node.CustomNode;
+
+/**
+ * A footnote reference, e.g. [^foo] in Some text with a footnote[^foo]
+ *
+ * The {@link #getLabel() label} is the text within brackets after {@code ^}, so {@code foo} in the example. It needs to
+ * match the label of a corresponding {@link FootnoteDefinition} for the footnote to be parsed.
+ */
+public class FootnoteReference extends CustomNode {
+ private String label;
+
+ public FootnoteReference(String label) {
+ this.label = label;
+ }
+
+ public String getLabel() {
+ return label;
+ }
+}
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnotesExtension.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnotesExtension.java
new file mode 100644
index 000000000..dd532fa34
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnotesExtension.java
@@ -0,0 +1,105 @@
+package org.commonmark.ext.footnotes;
+
+import org.commonmark.Extension;
+import org.commonmark.ext.footnotes.internal.*;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.NodeRenderer;
+import org.commonmark.renderer.html.HtmlRenderer;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory;
+import org.commonmark.renderer.markdown.MarkdownRenderer;
+
+import java.util.Set;
+
+/**
+ * Extension for footnotes with syntax like GitHub Flavored Markdown:
+ *
+ * Some text with a footnote[^1].
+ *
+ * [^1]: The text of the footnote.
+ *
+ * The [^1] is a {@link FootnoteReference}, with "1" being the label.
+ *
+ * The line with [^1]: ... is a {@link FootnoteDefinition}, with the contents as child nodes (can be a
+ * paragraph like in the example, or other blocks like lists).
+ *
+ * All the footnotes (definitions) will be rendered in a list at the end of a document, no matter where they appear in
+ * the source. The footnotes will be numbered starting from 1, then 2, etc, depending on the order in which they appear
+ * in the text (and not dependent on the label). The footnote reference is a link to the footnote, and from the footnote
+ * there is a link back to the reference (or multiple).
+ *
+ * There is also optional support for inline footnotes, use {@link #builder()} and then set {@link Builder#inlineFootnotes}.
+ *
+ * @see GitHub docs for footnotes
+ */
+public class FootnotesExtension implements Parser.ParserExtension,
+ HtmlRenderer.HtmlRendererExtension,
+ MarkdownRenderer.MarkdownRendererExtension {
+
+ private final boolean inlineFootnotes;
+
+ private FootnotesExtension(boolean inlineFootnotes) {
+ this.inlineFootnotes = inlineFootnotes;
+ }
+
+ /**
+ * The extension with the default configuration (no support for inline footnotes).
+ */
+ public static Extension create() {
+ return builder().build();
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ @Override
+ public void extend(Parser.Builder parserBuilder) {
+ parserBuilder
+ .customBlockParserFactory(new FootnoteBlockParser.Factory())
+ .linkProcessor(new FootnoteLinkProcessor());
+ if (inlineFootnotes) {
+ parserBuilder.linkMarker('^');
+ }
+ }
+
+ @Override
+ public void extend(HtmlRenderer.Builder rendererBuilder) {
+ rendererBuilder.nodeRendererFactory(FootnoteHtmlNodeRenderer::new);
+ }
+
+ @Override
+ public void extend(MarkdownRenderer.Builder rendererBuilder) {
+ rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() {
+ @Override
+ public NodeRenderer create(MarkdownNodeRendererContext context) {
+ return new FootnoteMarkdownNodeRenderer(context);
+ }
+
+ @Override
+ public Set getSpecialCharacters() {
+ return Set.of();
+ }
+ });
+ }
+
+ public static class Builder {
+
+ private boolean inlineFootnotes = false;
+
+ /**
+ * Enable support for inline footnotes without definitions, e.g.:
+ *
+ * Some text^[this is an inline footnote]
+ *
+ */
+ public Builder inlineFootnotes(boolean inlineFootnotes) {
+ this.inlineFootnotes = inlineFootnotes;
+ return this;
+ }
+
+ public FootnotesExtension build() {
+ return new FootnotesExtension(inlineFootnotes);
+ }
+ }
+}
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/InlineFootnote.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/InlineFootnote.java
new file mode 100644
index 000000000..665d01936
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/InlineFootnote.java
@@ -0,0 +1,6 @@
+package org.commonmark.ext.footnotes;
+
+import org.commonmark.node.CustomNode;
+
+public class InlineFootnote extends CustomNode {
+}
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBlockParser.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBlockParser.java
new file mode 100644
index 000000000..110bdef20
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBlockParser.java
@@ -0,0 +1,105 @@
+package org.commonmark.ext.footnotes.internal;
+
+import org.commonmark.ext.footnotes.FootnoteDefinition;
+import org.commonmark.node.Block;
+import org.commonmark.node.DefinitionMap;
+import org.commonmark.parser.block.*;
+import org.commonmark.text.Characters;
+
+import java.util.List;
+
+/**
+ * Parser for a single {@link FootnoteDefinition} block.
+ */
+public class FootnoteBlockParser extends AbstractBlockParser {
+
+ private final FootnoteDefinition block;
+
+ public FootnoteBlockParser(String label) {
+ block = new FootnoteDefinition(label);
+ }
+
+ @Override
+ public Block getBlock() {
+ return block;
+ }
+
+ @Override
+ public boolean isContainer() {
+ return true;
+ }
+
+ @Override
+ public boolean canContain(Block childBlock) {
+ return true;
+ }
+
+ @Override
+ public BlockContinue tryContinue(ParserState parserState) {
+ if (parserState.getIndent() >= 4) {
+ // It looks like content needs to be indented by 4 so that it's part of a footnote (instead of starting a new block).
+ return BlockContinue.atColumn(4);
+ } else if (parserState.isBlank()) {
+ // A blank line doesn't finish a footnote yet. If there's another line with indent >= 4 after it,
+ // that should result in another paragraph in this footnote definition.
+ return BlockContinue.atIndex(parserState.getIndex());
+ } else {
+ // We're not continuing to give other block parsers a chance to interrupt this definition.
+ // But if no other block parser applied (including another FootnotesBlockParser), we will
+ // accept the line via lazy continuation (same as a block quote).
+ return BlockContinue.none();
+ }
+ }
+
+ @Override
+ public List> getDefinitions() {
+ var map = new DefinitionMap<>(FootnoteDefinition.class);
+ map.putIfAbsent(block.getLabel(), block);
+ return List.of(map);
+ }
+
+ public static class Factory implements BlockParserFactory {
+
+ @Override
+ public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) {
+ if (state.getIndent() >= 4) {
+ return BlockStart.none();
+ }
+ var index = state.getNextNonSpaceIndex();
+ var content = state.getLine().getContent();
+ if (content.charAt(index) != '[' || index + 1 >= content.length()) {
+ return BlockStart.none();
+ }
+ index++;
+ if (content.charAt(index) != '^' || index + 1 >= content.length()) {
+ return BlockStart.none();
+ }
+ // Now at first label character (if any)
+ index++;
+ var labelStart = index;
+
+ for (index = labelStart; index < content.length(); index++) {
+ var c = content.charAt(index);
+ switch (c) {
+ case ']':
+ if (index > labelStart && index + 1 < content.length() && content.charAt(index + 1) == ':') {
+ var label = content.subSequence(labelStart, index).toString();
+ // After the colon, any number of spaces is skipped (not part of the content)
+ var afterSpaces = Characters.skipSpaceTab(content, index + 2, content.length());
+ return BlockStart.of(new FootnoteBlockParser(label)).atIndex(afterSpaces);
+ } else {
+ return BlockStart.none();
+ }
+ case ' ':
+ case '\r':
+ case '\n':
+ case '\0':
+ case '\t':
+ return BlockStart.none();
+ }
+ }
+
+ return BlockStart.none();
+ }
+ }
+}
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteHtmlNodeRenderer.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteHtmlNodeRenderer.java
new file mode 100644
index 000000000..70eb048a3
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteHtmlNodeRenderer.java
@@ -0,0 +1,391 @@
+package org.commonmark.ext.footnotes.internal;
+
+import org.commonmark.ext.footnotes.FootnoteDefinition;
+import org.commonmark.ext.footnotes.FootnoteReference;
+import org.commonmark.ext.footnotes.InlineFootnote;
+import org.commonmark.node.*;
+import org.commonmark.renderer.NodeRenderer;
+import org.commonmark.renderer.html.HtmlNodeRendererContext;
+import org.commonmark.renderer.html.HtmlWriter;
+
+import java.util.*;
+import java.util.function.Consumer;
+
+/**
+ * HTML rendering for footnotes.
+ *
+ * Aims to match the rendering of cmark-gfm (which is slightly different from GitHub's when it comes to class
+ * attributes, not sure why).
+ *
+ * Some notes on how rendering works:
+ *
+ *
Footnotes are numbered according to the order of references, starting at 1
+ *
Definitions are rendered at the end of the document, regardless of where the definition was in the source
+ *
Definitions are ordered by number
+ *
Definitions have links back to their references (one or more)
+ *
+ *
+ *
Nested footnotes
+ * Text in footnote definitions can reference other footnotes, even ones that aren't referenced in the main text.
+ * This makes them tricky because it's not enough to just go through the main text for references.
+ * And before we can render a definition, we need to know all references (because we add links back to references).
+ *
+ * In other words, footnotes form a directed graph. Footnotes can reference each other so cycles are possible too.
+ *
+ * One way to implement it, which is what cmark-gfm does, is to go through the whole document (including definitions)
+ * and find all references in order. That guarantees that all definitions are found, but it has strange results for
+ * ordering or when the reference is in an unreferenced definition, see tests. In graph terms, it renders all
+ * definitions that have an incoming edge, no matter whether they are connected to the main text or not.
+ *
+ * The way we implement it:
+ *
+ *
Start with the references in the main text; we can render them as we go
+ *
After the main text is rendered, we have the referenced definitions, but there might be more from definition text
+ *
To find the remaining definitions, we visit the definitions from before to look at references
+ *
Repeat (breadth-first search) until we've found all definitions (note that we can't render before that's done because of backrefs)
+ *
Now render the definitions (and any references inside)
+ *
+ * This means we only render definitions whose references are actually rendered, and in a meaningful order (all main
+ * text footnotes first, then any nested ones).
+ */
+public class FootnoteHtmlNodeRenderer implements NodeRenderer {
+
+ private final HtmlWriter html;
+ private final HtmlNodeRendererContext context;
+
+ /**
+ * All definitions (even potentially unused ones), for looking up references
+ */
+ private DefinitionMap definitionMap;
+
+ /**
+ * Definitions that were referenced, in order in which they should be rendered.
+ */
+ private final Map referencedDefinitions = new LinkedHashMap<>();
+
+ /**
+ * Information about references that should be rendered as footnotes. This doesn't contain all references, just the
+ * ones from inside definitions.
+ */
+ private final Map references = new HashMap<>();
+
+ public FootnoteHtmlNodeRenderer(HtmlNodeRendererContext context) {
+ this.html = context.getWriter();
+ this.context = context;
+ }
+
+ @Override
+ public Set> getNodeTypes() {
+ return Set.of(FootnoteReference.class, InlineFootnote.class, FootnoteDefinition.class);
+ }
+
+ @Override
+ public void beforeRoot(Node rootNode) {
+ // Collect all definitions first, so we can look them up when encountering a reference later.
+ var visitor = new DefinitionVisitor();
+ rootNode.accept(visitor);
+ definitionMap = visitor.definitions;
+ }
+
+ @Override
+ public void render(Node node) {
+ if (node instanceof FootnoteReference) {
+ // This is called for all references, even ones inside definitions that we render at the end.
+ // Inside definitions, we have registered the reference already.
+ var ref = (FootnoteReference) node;
+ // Use containsKey because if value is null, we don't need to try registering again.
+ var info = references.containsKey(ref) ? references.get(ref) : tryRegisterReference(ref);
+ if (info != null) {
+ renderReference(ref, info);
+ } else {
+ // A reference without a corresponding definition is rendered as plain text
+ html.text("[^" + ref.getLabel() + "]");
+ }
+ } else if (node instanceof InlineFootnote) {
+ var info = references.get(node);
+ if (info == null) {
+ info = registerReference(node, null);
+ }
+ renderReference(node, info);
+ }
+ }
+
+ @Override
+ public void afterRoot(Node rootNode) {
+ // Now render the referenced definitions if there are any.
+ if (referencedDefinitions.isEmpty()) {
+ return;
+ }
+
+ var firstDef = referencedDefinitions.keySet().iterator().next();
+ var attrs = new LinkedHashMap();
+ attrs.put("class", "footnotes");
+ attrs.put("data-footnotes", null);
+ html.tag("section", context.extendAttributes(firstDef, "section", attrs));
+ html.line();
+ html.tag("ol");
+ html.line();
+
+ // Check whether there are any footnotes inside the definitions that we're about to render. For those, we might
+ // need to render more definitions. So do a breadth-first search to find all relevant definitions.
+ var check = new LinkedList<>(referencedDefinitions.keySet());
+ while (!check.isEmpty()) {
+ var def = check.removeFirst();
+ def.accept(new ShallowReferenceVisitor(def, node -> {
+ if (node instanceof FootnoteReference) {
+ var ref = (FootnoteReference) node;
+ var d = definitionMap.get(ref.getLabel());
+ if (d != null) {
+ if (!referencedDefinitions.containsKey(d)) {
+ check.addLast(d);
+ }
+ references.put(ref, registerReference(d, d.getLabel()));
+ }
+ } else if (node instanceof InlineFootnote) {
+ check.addLast(node);
+ references.put(node, registerReference(node, null));
+ }
+ }));
+ }
+
+ for (var entry : referencedDefinitions.entrySet()) {
+ // This will also render any footnote references inside definitions
+ renderDefinition(entry.getKey(), entry.getValue());
+ }
+
+ html.tag("/ol");
+ html.line();
+ html.tag("/section");
+ html.line();
+ }
+
+ private ReferenceInfo tryRegisterReference(FootnoteReference ref) {
+ var def = definitionMap.get(ref.getLabel());
+ if (def == null) {
+ return null;
+ }
+ return registerReference(def, def.getLabel());
+ }
+
+ private ReferenceInfo registerReference(Node node, String label) {
+ // The first referenced definition gets number 1, second one 2, etc.
+ var referencedDef = referencedDefinitions.computeIfAbsent(node, k -> {
+ var num = referencedDefinitions.size() + 1;
+ var key = definitionKey(label, num);
+ return new ReferencedDefinition(num, key);
+ });
+ var definitionNumber = referencedDef.definitionNumber;
+ // The reference number for that particular definition. E.g. if there's two references for the same definition,
+ // the first one is 1, the second one 2, etc. This is needed to give each reference a unique ID so that each
+ // reference can get its own backlink from the definition.
+ var refNumber = referencedDef.references.size() + 1;
+ var definitionKey = referencedDef.definitionKey;
+ var id = referenceId(definitionKey, refNumber);
+ referencedDef.references.add(id);
+
+ return new ReferenceInfo(id, definitionId(definitionKey), definitionNumber);
+ }
+
+ private void renderReference(Node node, ReferenceInfo referenceInfo) {
+ html.tag("sup", context.extendAttributes(node, "sup", Map.of("class", "footnote-ref")));
+
+ var href = "#" + referenceInfo.definitionId;
+ var attrs = new LinkedHashMap();
+ attrs.put("href", href);
+ attrs.put("id", referenceInfo.id);
+ attrs.put("data-footnote-ref", null);
+ html.tag("a", context.extendAttributes(node, "a", attrs));
+ html.raw(String.valueOf(referenceInfo.definitionNumber));
+ html.tag("/a");
+ html.tag("/sup");
+ }
+
+ private void renderDefinition(Node def, ReferencedDefinition referencedDefinition) {
+ var attrs = new LinkedHashMap();
+ attrs.put("id", definitionId(referencedDefinition.definitionKey));
+ html.tag("li", context.extendAttributes(def, "li", attrs));
+ html.line();
+
+ if (def.getLastChild() instanceof Paragraph) {
+ // Add backlinks into last paragraph before
. This is what GFM does.
+ var lastParagraph = (Paragraph) def.getLastChild();
+ var node = def.getFirstChild();
+ while (node != lastParagraph) {
+ if (node instanceof Paragraph) {
+ // Because we're manually rendering the
for the last paragraph, do the same for all other
+ // paragraphs for consistency (Paragraph rendering might be overwritten by a custom renderer).
+ html.tag("p", context.extendAttributes(node, "p", Map.of()));
+ renderChildren(node);
+ html.tag("/p");
+ html.line();
+ } else {
+ context.render(node);
+ }
+ node = node.getNext();
+ }
+
+ html.tag("p", context.extendAttributes(lastParagraph, "p", Map.of()));
+ renderChildren(lastParagraph);
+ html.raw(" ");
+ renderBackrefs(def, referencedDefinition);
+ html.tag("/p");
+ html.line();
+ } else if (def instanceof InlineFootnote) {
+ html.tag("p", context.extendAttributes(def, "p", Map.of()));
+ renderChildren(def);
+ html.raw(" ");
+ renderBackrefs(def, referencedDefinition);
+ html.tag("/p");
+ html.line();
+ } else {
+ renderChildren(def);
+ html.line();
+ renderBackrefs(def, referencedDefinition);
+ }
+
+ html.tag("/li");
+ html.line();
+ }
+
+ private void renderBackrefs(Node def, ReferencedDefinition referencedDefinition) {
+ var refs = referencedDefinition.references;
+ for (int i = 0; i < refs.size(); i++) {
+ var ref = refs.get(i);
+ var refNumber = i + 1;
+ var idx = referencedDefinition.definitionNumber + (refNumber > 1 ? ("-" + refNumber) : "");
+
+ var attrs = new LinkedHashMap();
+ attrs.put("href", "#" + ref);
+ attrs.put("class", "footnote-backref");
+ attrs.put("data-footnote-backref", null);
+ attrs.put("data-footnote-backref-idx", idx);
+ attrs.put("aria-label", "Back to reference " + idx);
+ html.tag("a", context.extendAttributes(def, "a", attrs));
+ if (refNumber > 1) {
+ html.tag("sup", context.extendAttributes(def, "sup", Map.of("class", "footnote-ref")));
+ html.raw(String.valueOf(refNumber));
+ html.tag("/sup");
+ }
+ // U+21A9 LEFTWARDS ARROW WITH HOOK
+ html.raw("\u21A9");
+ html.tag("/a");
+ if (i + 1 < refs.size()) {
+ html.raw(" ");
+ }
+ }
+ }
+
+ private String referenceId(String definitionKey, int number) {
+ return "fnref" + definitionKey + (number == 1 ? "" : ("-" + number));
+ }
+
+ private String definitionKey(String label, int number) {
+ // Named definitions use the pattern "fn-{name}" and inline definitions use "fn{number}" so as not to conflict.
+ // "fn{number}" is also what pandoc uses (for all types), starting with number 1.
+ if (label != null) {
+ return "-" + label;
+ } else {
+ return "" + number;
+ }
+ }
+
+ private String definitionId(String definitionKey) {
+ return "fn" + definitionKey;
+ }
+
+ private void renderChildren(Node parent) {
+ Node node = parent.getFirstChild();
+ while (node != null) {
+ Node next = node.getNext();
+ context.render(node);
+ node = next;
+ }
+ }
+
+ private static class DefinitionVisitor extends AbstractVisitor {
+
+ private final DefinitionMap definitions = new DefinitionMap<>(FootnoteDefinition.class);
+
+ @Override
+ public void visit(CustomBlock customBlock) {
+ if (customBlock instanceof FootnoteDefinition) {
+ var def = (FootnoteDefinition) customBlock;
+ definitions.putIfAbsent(def.getLabel(), def);
+ } else {
+ super.visit(customBlock);
+ }
+ }
+ }
+
+ /**
+ * Visit footnote references/inline footnotes inside the parent (but not the parent itself). We want a shallow visit
+ * because the caller wants to control when to descend.
+ */
+ private static class ShallowReferenceVisitor extends AbstractVisitor {
+ private final Node parent;
+ private final Consumer consumer;
+
+ private ShallowReferenceVisitor(Node parent, Consumer consumer) {
+ this.parent = parent;
+ this.consumer = consumer;
+ }
+
+ @Override
+ public void visit(CustomNode customNode) {
+ if (customNode instanceof FootnoteReference) {
+ consumer.accept(customNode);
+ } else if (customNode instanceof InlineFootnote) {
+ if (customNode == parent) {
+ // Descend into the parent (inline footnotes can contain inline footnotes)
+ super.visit(customNode);
+ } else {
+ // Don't descend here because we want to be shallow.
+ consumer.accept(customNode);
+ }
+ } else {
+ super.visit(customNode);
+ }
+ }
+ }
+
+ private static class ReferencedDefinition {
+ /**
+ * The definition number, starting from 1, and in order in which they're referenced.
+ */
+ final int definitionNumber;
+ /**
+ * The unique key of the definition. Together with a static prefix it forms the ID used in the HTML.
+ */
+ final String definitionKey;
+ /**
+ * The IDs of references for this definition, for backrefs.
+ */
+ final List references = new ArrayList<>();
+
+ ReferencedDefinition(int definitionNumber, String definitionKey) {
+ this.definitionNumber = definitionNumber;
+ this.definitionKey = definitionKey;
+ }
+ }
+
+ private static class ReferenceInfo {
+ /**
+ * The ID of the reference; in the corresponding definition, a link back to this reference will be rendered.
+ */
+ private final String id;
+ /**
+ * The ID of the definition, for linking to the definition.
+ */
+ private final String definitionId;
+ /**
+ * The definition number, rendered in superscript.
+ */
+ private final int definitionNumber;
+
+ private ReferenceInfo(String id, String definitionId, int definitionNumber) {
+ this.id = id;
+ this.definitionId = definitionId;
+ this.definitionNumber = definitionNumber;
+ }
+ }
+}
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteLinkProcessor.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteLinkProcessor.java
new file mode 100644
index 000000000..07b008576
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteLinkProcessor.java
@@ -0,0 +1,57 @@
+package org.commonmark.ext.footnotes.internal;
+
+import org.commonmark.ext.footnotes.FootnoteDefinition;
+import org.commonmark.ext.footnotes.FootnoteReference;
+import org.commonmark.ext.footnotes.InlineFootnote;
+import org.commonmark.node.LinkReferenceDefinition;
+import org.commonmark.parser.InlineParserContext;
+import org.commonmark.parser.beta.LinkInfo;
+import org.commonmark.parser.beta.LinkProcessor;
+import org.commonmark.parser.beta.LinkResult;
+import org.commonmark.parser.beta.Scanner;
+
+/**
+ * For turning e.g. [^foo] into a {@link FootnoteReference},
+ * and ^[foo] into an {@link InlineFootnote}.
+ */
+public class FootnoteLinkProcessor implements LinkProcessor {
+ @Override
+ public LinkResult process(LinkInfo linkInfo, Scanner scanner, InlineParserContext context) {
+
+ if (linkInfo.marker() != null && linkInfo.marker().getLiteral().equals("^")) {
+ // An inline footnote like ^[footnote text]. Note that we only get the marker here if the option is enabled
+ // on the extension.
+ return LinkResult.wrapTextIn(new InlineFootnote(), linkInfo.afterTextBracket()).includeMarker();
+ }
+
+ if (linkInfo.destination() != null) {
+ // If it's an inline link, it can't be a footnote reference
+ return LinkResult.none();
+ }
+
+ var text = linkInfo.text();
+ if (!text.startsWith("^")) {
+ // Footnote reference needs to start with [^
+ return LinkResult.none();
+ }
+
+ if (linkInfo.label() != null && context.getDefinition(LinkReferenceDefinition.class, linkInfo.label()) != null) {
+ // If there's a label after the text and the label has a definition -> it's a link, and it should take
+ // preference, e.g. in `[^foo][bar]` if `[bar]` has a definition, `[^foo]` won't be a footnote reference.
+ return LinkResult.none();
+ }
+
+ var label = text.substring(1);
+ // Check if we have a definition, otherwise ignore (same behavior as for link reference definitions).
+ // Note that the definition parser already checked the syntax of the label, we don't need to check again.
+ var def = context.getDefinition(FootnoteDefinition.class, label);
+ if (def == null) {
+ return LinkResult.none();
+ }
+
+ // For footnotes, we only ever consume the text part of the link, not the label part (if any)
+ var position = linkInfo.afterTextBracket();
+ // If the marker is `![`, we don't want to include the `!`, so start from bracket
+ return LinkResult.replaceWith(new FootnoteReference(label), position);
+ }
+}
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteMarkdownNodeRenderer.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteMarkdownNodeRenderer.java
new file mode 100644
index 000000000..3dcf4fc83
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteMarkdownNodeRenderer.java
@@ -0,0 +1,70 @@
+package org.commonmark.ext.footnotes.internal;
+
+import org.commonmark.ext.footnotes.FootnoteDefinition;
+import org.commonmark.ext.footnotes.FootnoteReference;
+import org.commonmark.ext.footnotes.InlineFootnote;
+import org.commonmark.node.*;
+import org.commonmark.renderer.NodeRenderer;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownWriter;
+
+import java.util.Set;
+
+public class FootnoteMarkdownNodeRenderer implements NodeRenderer {
+
+ private final MarkdownWriter writer;
+ private final MarkdownNodeRendererContext context;
+
+ public FootnoteMarkdownNodeRenderer(MarkdownNodeRendererContext context) {
+ this.writer = context.getWriter();
+ this.context = context;
+ }
+
+ @Override
+ public Set> getNodeTypes() {
+ return Set.of(FootnoteReference.class, InlineFootnote.class, FootnoteDefinition.class);
+ }
+
+ @Override
+ public void render(Node node) {
+ if (node instanceof FootnoteReference) {
+ renderReference((FootnoteReference) node);
+ } else if (node instanceof InlineFootnote) {
+ renderInline((InlineFootnote) node);
+ } else if (node instanceof FootnoteDefinition) {
+ renderDefinition((FootnoteDefinition) node);
+ }
+ }
+
+ private void renderReference(FootnoteReference ref) {
+ writer.raw("[^");
+ // The label is parsed as-is without escaping, so we can render it back as-is
+ writer.raw(ref.getLabel());
+ writer.raw("]");
+ }
+
+ private void renderInline(InlineFootnote inlineFootnote) {
+ writer.raw("^[");
+ renderChildren(inlineFootnote);
+ writer.raw("]");
+ }
+
+ private void renderDefinition(FootnoteDefinition def) {
+ writer.raw("[^");
+ writer.raw(def.getLabel());
+ writer.raw("]: ");
+
+ writer.pushPrefix(" ");
+ renderChildren(def);
+ writer.popPrefix();
+ }
+
+ private void renderChildren(Node parent) {
+ Node node = parent.getFirstChild();
+ while (node != null) {
+ Node next = node.getNext();
+ context.render(node);
+ node = next;
+ }
+ }
+}
diff --git a/commonmark-ext-footnotes/src/main/javadoc/overview.html b/commonmark-ext-footnotes/src/main/javadoc/overview.html
new file mode 100644
index 000000000..4f19d2115
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/javadoc/overview.html
@@ -0,0 +1,6 @@
+
+
+Extension for footnotes using [^1] syntax
+
See {@link org.commonmark.ext.footnotes.FootnotesExtension}
+
+
diff --git a/commonmark-ext-footnotes/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-footnotes/src/main/resources/META-INF/LICENSE.txt
new file mode 100644
index 000000000..b09e367ce
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/resources/META-INF/LICENSE.txt
@@ -0,0 +1,23 @@
+Copyright (c) 2015, Atlassian Pty Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnoteHtmlRendererTest.java b/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnoteHtmlRendererTest.java
new file mode 100644
index 000000000..bc7d4f74c
--- /dev/null
+++ b/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnoteHtmlRendererTest.java
@@ -0,0 +1,339 @@
+package org.commonmark.ext.footnotes;
+
+import org.commonmark.Extension;
+import org.commonmark.node.Document;
+import org.commonmark.node.Paragraph;
+import org.commonmark.node.Text;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.html.HtmlRenderer;
+import org.commonmark.testutil.Asserts;
+import org.commonmark.testutil.RenderingTestCase;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+import java.util.Set;
+
+public class FootnoteHtmlRendererTest extends RenderingTestCase {
+ private static final Set EXTENSIONS = Set.of(FootnotesExtension.create());
+ private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
+ private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build();
+
+ @Test
+ public void testOne() {
+ assertRendering("Test [^foo]\n\n[^foo]: note\n",
+ "
\n" +
+ "\n" +
+ "\n");
+ }
+
+ @Test
+ public void testLabelNormalization() {
+ // Labels match via their normalized form. For the href and IDs to match, rendering needs to use the
+ // label from the definition consistently.
+ assertRendering("Test [^bar]\n\n[^BAR]: note\n",
+ "
\n" +
+ "\n" +
+ "\n");
+ }
+
+ @Test
+ public void testMultipleReferences() {
+ // Tests a few things:
+ // - Numbering is based on the reference order, not the definition order
+ // - The same number is used when a definition is referenced multiple times
+ // - Multiple backrefs are rendered
+ assertRendering("First [^foo]\n\nThen [^bar]\n\nThen [^foo] again\n\n[^bar]: b\n[^foo]: f\n",
+ "
\n" +
+ "\n" +
+ "\n");
+ }
+
+ @Test
+ public void testNestedFootnotesOrder() {
+ // GitHub has a strange result here, the definitions are in order: 1. bar, 2. foo.
+ // The reason is that the number is done based on all references in document order, including references in
+ // definitions. So [^bar] from the first line is first.
+ assertRendering("[^foo]: foo [^bar]\n" +
+ "\n" +
+ "[^foo]\n" +
+ "\n" +
+ "[^bar]: bar\n", "
\n" +
+ "\n" +
+ "\n");
+ }
+
+ @Test
+ public void testNestedFootnotesUnreferenced() {
+ // This should not result in any footnotes, as baz itself isn't referenced.
+ // But GitHub renders bar only, with a broken backref, because bar is referenced from foo.
+ assertRendering("[^foo]: foo[^bar]\n" +
+ "[^bar]: bar\n", "");
+
+ // And here only 1 is rendered.
+ assertRendering("[^1]\n" +
+ "\n" +
+ "[^1]: one\n" +
+ "[^foo]: foo[^bar]\n" +
+ "[^bar]: bar\n", "
\n" +
+ "\n" +
+ "\n");
+ }
+
+ @Test
+ public void testInlineFootnoteWithReference() {
+ // This is a bit tricky because the IDs need to be unique.
+ assertRenderingInline("Test ^[inline [^1]]\n" +
+ "\n" +
+ "[^1]: normal",
+ "
\n" +
+ "\n" +
+ "\n");
+ }
+
+
+ @Test
+ public void testRenderNodesDirectly() {
+ // Everything should work as expected when rendering from nodes directly (no parsing step).
+ var doc = new Document();
+ var p = new Paragraph();
+ p.appendChild(new Text("Test "));
+ p.appendChild(new FootnoteReference("foo"));
+ var def = new FootnoteDefinition("foo");
+ var note = new Paragraph();
+ note.appendChild(new Text("note!"));
+ def.appendChild(note);
+ doc.appendChild(p);
+ doc.appendChild(def);
+
+ var expected = "
+````````````````````````````````
+
+## Alert content
+
+Marker alone in first paragraph, blank line, then content:
+
+```````````````````````````````` example alert
+> [!NOTE]
+>
+> Content
+.
+
+
Note
+
Content
+
+````````````````````````````````
+
+Multiple paragraphs:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> First paragraph
+>
+> Second paragraph
+.
+
+
Note
+
First paragraph
+
Second paragraph
+
+````````````````````````````````
+
+Inline formatting:
+
+```````````````````````````````` example alert
+> [!TIP]
+> This is **bold** and *italic*
+.
+
+````````````````````````````````
+
+List inside alert:
+
+```````````````````````````````` example alert
+> [!IMPORTANT]
+> Items:
+> - First item
+> - Second item
+.
+
+
Important
+
Items:
+
+
First item
+
Second item
+
+
+````````````````````````````````
+
+Links inside alert:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> Check out [this link](https://example.com) for more info
+.
+
+````````````````````````````````
+
+Empty lines in middle of alert:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> First
+>
+>
+> After empty lines
+.
+
+
Note
+
First
+
After empty lines
+
+````````````````````````````````
+
+## Not an alert
+
+Text after marker on the same line:
+
+```````````````````````````````` example alert
+> [!NOTE] Some text
+.
+
+
[!NOTE] Some text
+
+````````````````````````````````
+
+Unknown type:
+
+```````````````````````````````` example alert
+> [!INVALID]
+> Some text
+.
+
+
[!INVALID]
+Some text
+
+````````````````````````````````
+
+Unconfigured custom type is not an alert:
+
+```````````````````````````````` example alert
+> [!INFO]
+> Should be blockquote
+.
+
+
[!INFO]
+Should be blockquote
+
+````````````````````````````````
+
+Marker with no content:
+
+```````````````````````````````` example alert
+> [!NOTE]
+.
+
+
[!NOTE]
+
+````````````````````````````````
+
+Whitespace-only content after marker:
+
+```````````````````````````````` example alert
+> [!TIP]
+>
+>
+.
+
+
[!TIP]
+
+````````````````````````````````
+
+Extra space inside marker:
+
+```````````````````````````````` example alert
+> [! NOTE]
+> Should be blockquote
+.
+
+
[! NOTE]
+Should be blockquote
+
+````````````````````````````````
+
+Missing brackets:
+
+```````````````````````````````` example alert
+> !NOTE
+> Should be blockquote
+.
+
+
!NOTE
+Should be blockquote
+
+````````````````````````````````
+
+Missing exclamation mark:
+
+```````````````````````````````` example alert
+> [NOTE]
+> Should be blockquote
+.
+
+
[NOTE]
+Should be blockquote
+
+````````````````````````````````
+
+Regular blockquote is not affected:
+
+```````````````````````````````` example alert
+> This is a regular blockquote
+.
+
+
This is a regular blockquote
+
+````````````````````````````````
+
+## Boundaries
+
+Trailing spaces after marker:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> This is a note
+.
+
+
Note
+
This is a note
+
+````````````````````````````````
+
+Trailing tabs after marker:
+
+```````````````````````````````` example alert
+> [!WARNING]→→
+> Be careful
+.
+
+
Warning
+
Be careful
+
+````````````````````````````````
+
+Leading spaces before blockquote marker:
+
+```````````````````````````````` example alert
+ > [!IMPORTANT]
+ > Content
+.
+
+
Important
+
Content
+
+````````````````````````````````
+
+Blank line after marker ends the blockquote (not an alert):
+
+```````````````````````````````` example alert
+> [!NOTE]
+
+Some text
+.
+
+
[!NOTE]
+
+
Some text
+````````````````````````````````
+
+Alert followed by blockquote:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> This is an alert
+
+> This is a blockquote
+.
+
+
Note
+
This is an alert
+
+
+
This is a blockquote
+
+````````````````````````````````
+
+Adjacent alerts:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> First alert
+
+> [!WARNING]
+> Second alert
+.
+
+
Note
+
First alert
+
+
+
Warning
+
Second alert
+
+````````````````````````````````
+
+## Nesting and containers
+
+Nested alert inside alert renders as blockquote:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> This is a note
+>> [!WARNING]
+>> Nested content
+.
+
+
Note
+
This is a note
+
+
[!WARNING]
+Nested content
+
+
+````````````````````````````````
+
+Nested blockquote inside alert:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> This is a note
+>> Nested blockquote
+.
+
+
Note
+
This is a note
+
+
Nested blockquote
+
+
+````````````````````````````````
+
+Alert inside list item stays as blockquote:
+
+```````````````````````````````` example alert
+- > [!NOTE]
+ > Test
+.
+
+
+
+
[!NOTE]
+Test
+
+
+
+````````````````````````````````
+
+Alert marker in content is treated as text:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> This is a note
+> [!WARNING]
+> This is still part of the note
+.
+
+
Note
+
This is a note
+[!WARNING]
+This is still part of the note
+
+````````````````````````````````
+
+## Continuation and interruption
+
+Lazy continuation:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> First line
+Lazy continuation
+> Continues alert
+.
+
+
Note
+
First line
+Lazy continuation
+Continues alert
+
+````````````````````````````````
+
+Alert type after regular blockquote content is not an alert:
+
+```````````````````````````````` example alert
+> Regular blockquote
+> [!NOTE]
+> More text
+.
+
+
Regular blockquote
+[!NOTE]
+More text
+
+````````````````````````````````
diff --git a/commonmark-ext-gfm-alerts/src/test/resources/generate-alerts-spec.java b/commonmark-ext-gfm-alerts/src/test/resources/generate-alerts-spec.java
new file mode 100644
index 000000000..06192f107
--- /dev/null
+++ b/commonmark-ext-gfm-alerts/src/test/resources/generate-alerts-spec.java
@@ -0,0 +1,111 @@
+///usr/bin/env jbang "$0" "$@" ; exit $?
+
+// Generates alerts-spec.txt from alerts-spec-template.md by rendering each example
+// through the GitHub Markdown API and inserting the normalized HTML expectation.
+//
+// Prerequisites: gh CLI installed and authenticated (gh auth login)
+// Usage: cd commonmark-ext-gfm-alerts/src/test/resources && jbang generate-alerts-spec.java
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
+class GenerateAlertsSpec {
+
+ private static final String FENCE = "````````````````````````````````";
+ private static final String EXAMPLE_OPEN = FENCE + " example alert";
+
+ public static void main(String[] args) throws Exception {
+ var templatePath = Path.of("alerts-spec-template.md");
+ if (!Files.exists(templatePath)) {
+ System.err.println("Run from the directory containing alerts-spec-template.md");
+ System.exit(1);
+ }
+
+ var lines = Files.readAllLines(templatePath);
+ var output = new ArrayList();
+ var header = "Expectations verified against GitHub Markdown API (gh api markdown -f mode=gfm).\n" +
+ "Our HTML omits GitHub's SVG icons and uses a `data-alert-type` attribute instead.";
+
+ int exampleCount = 0;
+ int i = 0;
+ while (i < lines.size()) {
+ var line = lines.get(i);
+
+ // Insert header after the first heading
+ if (i == 0 && line.startsWith("# ")) {
+ output.add(line);
+ output.add("");
+ output.add(header);
+ i++;
+ continue;
+ }
+
+ if (line.equals(EXAMPLE_OPEN)) {
+ // Collect source lines until closing fence
+ output.add(line);
+ i++;
+ var sourceLines = new ArrayList();
+ while (i < lines.size() && !lines.get(i).equals(FENCE)) {
+ sourceLines.add(lines.get(i));
+ output.add(lines.get(i));
+ i++;
+ }
+
+ // Render via GitHub API (→ represents tabs in the spec format)
+ var source = String.join("\n", sourceLines).replace("\u2192", "\t");
+ exampleCount++;
+ System.out.printf("%d: %s%n", exampleCount,
+ source.substring(0, Math.min(40, source.length())).replace("\n", "\\n"));
+
+ var ghHtml = normalizeHtml(renderViaGh(source));
+
+ // Insert separator and HTML expectation
+ output.add(".");
+ output.add(ghHtml);
+ output.add(FENCE);
+ i++; // skip closing fence from template
+ } else {
+ output.add(line);
+ i++;
+ }
+ }
+
+ var specPath = Path.of("alerts-spec.txt");
+ Files.writeString(specPath, String.join("\n", output) + "\n");
+ System.out.println("Done — " + exampleCount + " examples written to alerts-spec.txt");
+ }
+
+ static String renderViaGh(String markdown) throws Exception {
+ var process = new ProcessBuilder("gh", "api", "markdown", "-f", "mode=gfm", "-f", "text=" + markdown)
+ .redirectErrorStream(true)
+ .start();
+ var output = new String(process.getInputStream().readAllBytes());
+ if (process.waitFor() != 0) {
+ throw new RuntimeException("gh api failed: " + output);
+ }
+ return output;
+ }
+
+ // Normalize GitHub API HTML to match our renderer output.
+ static String normalizeHtml(String html) {
+ // Strip GitHub-specific elements and attributes
+ html = Pattern.compile("", Pattern.DOTALL).matcher(html).replaceAll("");
+ html = html.replaceAll(" (dir=\"auto\"|rel=\"nofollow\"|class=\"notranslate\")", "");
+ // Add data-alert-type and insert newlines to match our renderer's formatting
+ html = Pattern.compile("class=\"markdown-alert markdown-alert-(\\w+)\"")
+ .matcher(html)
+ .replaceAll("class=\"markdown-alert markdown-alert-$1\" data-alert-type=\"$1\"");
+ html = Pattern.compile("(data-alert-type=\"\\w+\">)(
", "
\n
");
+ return html.replace("\r\n", "\n").lines()
+ .map(String::stripTrailing)
+ .reduce((a, b) -> a + "\n" + b)
+ .orElse("")
+ .strip();
+ }
+}
\ No newline at end of file
diff --git a/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.core.runtime.prefs b/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.core.runtime.prefs
deleted file mode 100644
index 5a0ad22d2..000000000
--- a/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.core.runtime.prefs
+++ /dev/null
@@ -1,2 +0,0 @@
-eclipse.preferences.version=1
-line.separator=\n
diff --git a/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.jdt.core.prefs b/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.jdt.core.prefs
deleted file mode 100644
index 3c0d27c8f..000000000
--- a/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.jdt.core.prefs
+++ /dev/null
@@ -1,290 +0,0 @@
-eclipse.preferences.version=1
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
-org.eclipse.jdt.core.compiler.compliance=1.7
-org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
-org.eclipse.jdt.core.compiler.source=1.7
-org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
-org.eclipse.jdt.core.formatter.alignment_for_assignment=0
-org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
-org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
-org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
-org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
-org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
-org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0
-org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
-org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80
-org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
-org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch=16
-org.eclipse.jdt.core.formatter.blank_lines_after_imports=1
-org.eclipse.jdt.core.formatter.blank_lines_after_package=1
-org.eclipse.jdt.core.formatter.blank_lines_before_field=0
-org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0
-org.eclipse.jdt.core.formatter.blank_lines_before_imports=1
-org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1
-org.eclipse.jdt.core.formatter.blank_lines_before_method=1
-org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1
-org.eclipse.jdt.core.formatter.blank_lines_before_package=0
-org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1
-org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1
-org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_lambda_body=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line
-org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
-org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
-org.eclipse.jdt.core.formatter.comment.format_block_comments=true
-org.eclipse.jdt.core.formatter.comment.format_header=false
-org.eclipse.jdt.core.formatter.comment.format_html=true
-org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
-org.eclipse.jdt.core.formatter.comment.format_line_comments=true
-org.eclipse.jdt.core.formatter.comment.format_source_code=true
-org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true
-org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
-org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
-org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert
-org.eclipse.jdt.core.formatter.comment.line_length=120
-org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true
-org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true
-org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=false
-org.eclipse.jdt.core.formatter.compact_else_if=true
-org.eclipse.jdt.core.formatter.continuation_indentation=2
-org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2
-org.eclipse.jdt.core.formatter.disabling_tag=@formatter\:off
-org.eclipse.jdt.core.formatter.enabling_tag=@formatter\:on
-org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false
-org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true
-org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true
-org.eclipse.jdt.core.formatter.indent_empty_lines=false
-org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true
-org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true
-org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true
-org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true
-org.eclipse.jdt.core.formatter.indentation.size=4
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_label=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_type_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert
-org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert
-org.eclipse.jdt.core.formatter.insert_space_after_lambda_arrow=insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources=insert
-org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert
-org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_lambda_arrow=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert
-org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert
-org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert
-org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.join_lines_in_comments=true
-org.eclipse.jdt.core.formatter.join_wrapped_lines=false
-org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
-org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
-org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
-org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
-org.eclipse.jdt.core.formatter.lineSplit=120
-org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
-org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
-org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
-org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1
-org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true
-org.eclipse.jdt.core.formatter.tabulation.char=space
-org.eclipse.jdt.core.formatter.tabulation.size=4
-org.eclipse.jdt.core.formatter.use_on_off_tags=false
-org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false
-org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true
-org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true
-org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true
-org.eclipse.jdt.core.javaFormatter=org.eclipse.jdt.core.defaultJavaFormatter
diff --git a/commonmark-ext-gfm-strikethrough/pom.xml b/commonmark-ext-gfm-strikethrough/pom.xml
index 632d6b658..9d8f55e5f 100644
--- a/commonmark-ext-gfm-strikethrough/pom.xml
+++ b/commonmark-ext-gfm-strikethrough/pom.xml
@@ -4,7 +4,7 @@
org.commonmarkcommonmark-parent
- 0.17.3-SNAPSHOT
+ 0.28.1-SNAPSHOTcommonmark-ext-gfm-strikethrough
@@ -24,20 +24,4 @@
-
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
-
- org.commonmark.ext.gfm.strikethrough
-
-
-
-
-
-
-
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/module-info.java b/commonmark-ext-gfm-strikethrough/src/main/java/module-info.java
new file mode 100644
index 000000000..b6204934b
--- /dev/null
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/module-info.java
@@ -0,0 +1,5 @@
+module org.commonmark.ext.gfm.strikethrough {
+ exports org.commonmark.ext.gfm.strikethrough;
+
+ requires transitive org.commonmark;
+}
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java
index 115ae9ea4..0c24642bc 100644
--- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java
@@ -4,19 +4,23 @@
import org.commonmark.node.Delimited;
/**
- * A strikethrough node containing text and other inline nodes nodes as children.
+ * A strikethrough node containing text and other inline nodes as children.
*/
public class Strikethrough extends CustomNode implements Delimited {
- private static final String DELIMITER = "~~";
+ private String delimiter;
+
+ public Strikethrough(String delimiter) {
+ this.delimiter = delimiter;
+ }
@Override
public String getOpeningDelimiter() {
- return DELIMITER;
+ return delimiter;
}
@Override
public String getClosingDelimiter() {
- return DELIMITER;
+ return delimiter;
}
}
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java
index 3d0839f11..364205aed 100644
--- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java
@@ -1,42 +1,78 @@
package org.commonmark.ext.gfm.strikethrough;
import org.commonmark.Extension;
-import org.commonmark.renderer.text.TextContentRenderer;
-import org.commonmark.renderer.text.TextContentNodeRendererContext;
-import org.commonmark.renderer.text.TextContentNodeRendererFactory;
import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughDelimiterProcessor;
import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughHtmlNodeRenderer;
+import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughMarkdownNodeRenderer;
import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughTextContentNodeRenderer;
-import org.commonmark.renderer.html.HtmlRenderer;
-import org.commonmark.renderer.html.HtmlNodeRendererContext;
-import org.commonmark.renderer.html.HtmlNodeRendererFactory;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.NodeRenderer;
+import org.commonmark.renderer.html.HtmlNodeRendererContext;
+import org.commonmark.renderer.html.HtmlNodeRendererFactory;
+import org.commonmark.renderer.html.HtmlRenderer;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory;
+import org.commonmark.renderer.markdown.MarkdownRenderer;
+import org.commonmark.renderer.text.TextContentNodeRendererContext;
+import org.commonmark.renderer.text.TextContentNodeRendererFactory;
+import org.commonmark.renderer.text.TextContentRenderer;
+
+import java.util.Set;
/**
- * Extension for GFM strikethrough using ~~ (GitHub Flavored Markdown).
+ * Extension for GFM strikethrough using {@code ~} or {@code ~~} (GitHub Flavored Markdown).
+ *
Example input:
+ *
{@code ~foo~ or ~~bar~~}
+ *
Example output (HTML):
+ *
{@code foo or bar}
*
- * Create it with {@link #create()} and then configure it on the builders
+ * Create the extension with {@link #create()} and then add it to the parser and renderer builders
* ({@link org.commonmark.parser.Parser.Builder#extensions(Iterable)},
* {@link HtmlRenderer.Builder#extensions(Iterable)}).
*
*
* The parsed strikethrough text regions are turned into {@link Strikethrough} nodes.
*
+ *
+ * If you have another extension that only uses a single tilde ({@code ~}) syntax, you will have to configure this
+ * {@link StrikethroughExtension} to only accept the double tilde syntax, like this:
+ *
+ * If you don't do that, there's a conflict between the two extensions and you will get an
+ * {@link IllegalArgumentException} when constructing the parser.
+ *
*/
public class StrikethroughExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension,
- TextContentRenderer.TextContentRendererExtension {
+ TextContentRenderer.TextContentRendererExtension, MarkdownRenderer.MarkdownRendererExtension {
- private StrikethroughExtension() {
+ private final boolean requireTwoTildes;
+
+ private StrikethroughExtension(Builder builder) {
+ this.requireTwoTildes = builder.requireTwoTildes;
}
+ /**
+ * @return the extension with default options
+ */
public static Extension create() {
- return new StrikethroughExtension();
+ return builder().build();
+ }
+
+ /**
+ * @return a builder to configure the behavior of the extension
+ */
+ public static Builder builder() {
+ return new Builder();
}
@Override
public void extend(Parser.Builder parserBuilder) {
- parserBuilder.customDelimiterProcessor(new StrikethroughDelimiterProcessor());
+ parserBuilder.customDelimiterProcessor(new StrikethroughDelimiterProcessor(requireTwoTildes));
}
@Override
@@ -58,4 +94,41 @@ public NodeRenderer create(TextContentNodeRendererContext context) {
}
});
}
+
+ @Override
+ public void extend(MarkdownRenderer.Builder rendererBuilder) {
+ rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() {
+ @Override
+ public NodeRenderer create(MarkdownNodeRendererContext context) {
+ return new StrikethroughMarkdownNodeRenderer(context);
+ }
+
+ @Override
+ public Set getSpecialCharacters() {
+ return Set.of('~');
+ }
+ });
+ }
+
+ public static class Builder {
+
+ private boolean requireTwoTildes = false;
+
+ /**
+ * @param requireTwoTildes Whether two tilde characters ({@code ~~}) are required for strikethrough or whether
+ * one is also enough. Default is {@code false}; both a single tilde and two tildes can be used for strikethrough.
+ * @return {@code this}
+ */
+ public Builder requireTwoTildes(boolean requireTwoTildes) {
+ this.requireTwoTildes = requireTwoTildes;
+ return this;
+ }
+
+ /**
+ * @return a configured extension
+ */
+ public Extension build() {
+ return new StrikethroughExtension(this);
+ }
+ }
}
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java
index 7d54eedf2..4657106ab 100644
--- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java
@@ -10,6 +10,16 @@
public class StrikethroughDelimiterProcessor implements DelimiterProcessor {
+ private final boolean requireTwoTildes;
+
+ public StrikethroughDelimiterProcessor() {
+ this(false);
+ }
+
+ public StrikethroughDelimiterProcessor(boolean requireTwoTildes) {
+ this.requireTwoTildes = requireTwoTildes;
+ }
+
@Override
public char getOpeningCharacter() {
return '~';
@@ -22,33 +32,34 @@ public char getClosingCharacter() {
@Override
public int getMinLength() {
- return 2;
+ return requireTwoTildes ? 2 : 1;
}
@Override
public int process(DelimiterRun openingRun, DelimiterRun closingRun) {
- if (openingRun.length() >= 2 && closingRun.length() >= 2) {
- // Use exactly two delimiters even if we have more, and don't care about internal openers/closers.
+ if (openingRun.length() == closingRun.length() && openingRun.length() <= 2) {
+ // GitHub only accepts either one or two delimiters, but not a mix or more than that.
Text opener = openingRun.getOpener();
// Wrap nodes between delimiters in strikethrough.
- Node strikethrough = new Strikethrough();
+ String delimiter = openingRun.length() == 1 ? opener.getLiteral() : opener.getLiteral() + opener.getLiteral();
+ Node strikethrough = new Strikethrough(delimiter);
SourceSpans sourceSpans = new SourceSpans();
- sourceSpans.addAllFrom(openingRun.getOpeners(2));
+ sourceSpans.addAllFrom(openingRun.getOpeners(openingRun.length()));
for (Node node : Nodes.between(opener, closingRun.getCloser())) {
strikethrough.appendChild(node);
sourceSpans.addAll(node.getSourceSpans());
}
- sourceSpans.addAllFrom(closingRun.getClosers(2));
+ sourceSpans.addAllFrom(closingRun.getClosers(closingRun.length()));
strikethrough.setSourceSpans(sourceSpans.getSourceSpans());
opener.insertAfter(strikethrough);
- return 2;
+ return openingRun.length();
} else {
return 0;
}
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlNodeRenderer.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlNodeRenderer.java
index 4dd0de39b..b1a82cb03 100644
--- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlNodeRenderer.java
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlNodeRenderer.java
@@ -1,10 +1,9 @@
package org.commonmark.ext.gfm.strikethrough.internal;
-import org.commonmark.renderer.html.HtmlWriter;
-import org.commonmark.renderer.html.HtmlNodeRendererContext;
import org.commonmark.node.Node;
+import org.commonmark.renderer.html.HtmlNodeRendererContext;
+import org.commonmark.renderer.html.HtmlWriter;
-import java.util.Collections;
import java.util.Map;
public class StrikethroughHtmlNodeRenderer extends StrikethroughNodeRenderer {
@@ -19,7 +18,7 @@ public StrikethroughHtmlNodeRenderer(HtmlNodeRendererContext context) {
@Override
public void render(Node node) {
- Map attributes = context.extendAttributes(node, "del", Collections.emptyMap());
+ Map attributes = context.extendAttributes(node, "del", Map.of());
html.tag("del", attributes);
renderChildren(node);
html.tag("/del");
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughMarkdownNodeRenderer.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughMarkdownNodeRenderer.java
new file mode 100644
index 000000000..1c91dd64f
--- /dev/null
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughMarkdownNodeRenderer.java
@@ -0,0 +1,34 @@
+package org.commonmark.ext.gfm.strikethrough.internal;
+
+import org.commonmark.ext.gfm.strikethrough.Strikethrough;
+import org.commonmark.node.Node;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownWriter;
+
+public class StrikethroughMarkdownNodeRenderer extends StrikethroughNodeRenderer {
+
+ private final MarkdownNodeRendererContext context;
+ private final MarkdownWriter writer;
+
+ public StrikethroughMarkdownNodeRenderer(MarkdownNodeRendererContext context) {
+ this.context = context;
+ this.writer = context.getWriter();
+ }
+
+ @Override
+ public void render(Node node) {
+ Strikethrough strikethrough = (Strikethrough) node;
+ writer.raw(strikethrough.getOpeningDelimiter());
+ renderChildren(node);
+ writer.raw(strikethrough.getClosingDelimiter());
+ }
+
+ private void renderChildren(Node parent) {
+ Node node = parent.getFirstChild();
+ while (node != null) {
+ Node next = node.getNext();
+ context.render(node);
+ node = next;
+ }
+ }
+}
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughNodeRenderer.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughNodeRenderer.java
index 4f3a12618..18ed21887 100644
--- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughNodeRenderer.java
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughNodeRenderer.java
@@ -4,13 +4,12 @@
import org.commonmark.node.Node;
import org.commonmark.renderer.NodeRenderer;
-import java.util.Collections;
import java.util.Set;
abstract class StrikethroughNodeRenderer implements NodeRenderer {
@Override
public Set> getNodeTypes() {
- return Collections.>singleton(Strikethrough.class);
+ return Set.of(Strikethrough.class);
}
}
diff --git a/commonmark-ext-gfm-strikethrough/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-gfm-strikethrough/src/main/resources/META-INF/LICENSE.txt
new file mode 100644
index 000000000..b09e367ce
--- /dev/null
+++ b/commonmark-ext-gfm-strikethrough/src/main/resources/META-INF/LICENSE.txt
@@ -0,0 +1,23 @@
+Copyright (c) 2015, Atlassian Pty Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughMarkdownRendererTest.java b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughMarkdownRendererTest.java
new file mode 100644
index 000000000..c497a4db3
--- /dev/null
+++ b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughMarkdownRendererTest.java
@@ -0,0 +1,35 @@
+package org.commonmark.ext.gfm.strikethrough;
+
+import org.commonmark.Extension;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.markdown.MarkdownRenderer;
+import org.junit.jupiter.api.Test;
+
+import java.util.Set;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+public class StrikethroughMarkdownRendererTest {
+
+ private static final Set EXTENSIONS = Set.of(StrikethroughExtension.create());
+ private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
+ private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build();
+
+ @Test
+ public void testStrikethrough() {
+ assertRoundTrip("~foo~ ~bar~\n");
+ assertRoundTrip("~~foo~~ ~~bar~~\n");
+ assertRoundTrip("~~f\\~oo~~ ~~bar~~\n");
+
+ assertRoundTrip("\\~foo\\~\n");
+ }
+
+ protected String render(String source) {
+ return RENDERER.render(PARSER.parse(source));
+ }
+
+ private void assertRoundTrip(String input) {
+ String rendered = render(input);
+ assertThat(rendered).isEqualTo(input);
+ }
+}
diff --git a/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughSpecTest.java b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughSpecTest.java
new file mode 100644
index 000000000..f1199b521
--- /dev/null
+++ b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughSpecTest.java
@@ -0,0 +1,42 @@
+package org.commonmark.ext.gfm.strikethrough;
+
+import org.commonmark.Extension;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.html.HtmlRenderer;
+import org.commonmark.testutil.RenderingTestCase;
+import org.commonmark.testutil.TestResources;
+import org.commonmark.testutil.example.Example;
+import org.commonmark.testutil.example.ExampleReader;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.Parameter;
+import org.junit.jupiter.params.ParameterizedClass;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.util.List;
+import java.util.Set;
+
+@ParameterizedClass
+@MethodSource("data")
+public class StrikethroughSpecTest extends RenderingTestCase {
+
+ private static final Set EXTENSIONS = Set.of(StrikethroughExtension.create());
+ private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
+ private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build();
+
+ @Parameter
+ Example example;
+
+ static List data() {
+ return ExampleReader.readExamples(TestResources.getGfmSpec(), "strikethrough");
+ }
+
+ @Test
+ public void testHtmlRendering() {
+ assertRendering(example.getSource(), example.getHtml());
+ }
+
+ @Override
+ protected String render(String source) {
+ return RENDERER.render(PARSER.parse(source));
+ }
+}
diff --git a/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughTest.java b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughTest.java
index e2e3b95c4..c29391cdd 100644
--- a/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughTest.java
+++ b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughTest.java
@@ -4,34 +4,36 @@
import org.commonmark.node.Node;
import org.commonmark.node.Paragraph;
import org.commonmark.node.SourceSpan;
+import org.commonmark.node.Text;
import org.commonmark.parser.IncludeSourceSpans;
import org.commonmark.parser.Parser;
+import org.commonmark.parser.delimiter.DelimiterProcessor;
+import org.commonmark.parser.delimiter.DelimiterRun;
import org.commonmark.renderer.html.HtmlRenderer;
import org.commonmark.renderer.text.TextContentRenderer;
import org.commonmark.testutil.RenderingTestCase;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.util.Arrays;
-import java.util.Collections;
+import java.util.List;
import java.util.Set;
-import static org.junit.Assert.assertEquals;
+import static org.assertj.core.api.Assertions.assertThat;
public class StrikethroughTest extends RenderingTestCase {
- private static final Set EXTENSIONS = Collections.singleton(StrikethroughExtension.create());
+ private static final Set EXTENSIONS = Set.of(StrikethroughExtension.create());
private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
private static final HtmlRenderer HTML_RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build();
private static final TextContentRenderer CONTENT_RENDERER = TextContentRenderer.builder()
.extensions(EXTENSIONS).build();
@Test
- public void oneTildeIsNotEnough() {
- assertRendering("~foo~", "
~foo~
\n");
+ public void oneTildeIsEnough() {
+ assertRendering("~foo~", "
foo
\n");
}
@Test
- public void twoTildesYay() {
+ public void twoTildesWorksToo() {
assertRendering("~~foo~~", "
foo
\n");
}
@@ -48,23 +50,22 @@ public void unmatched() {
@Test
public void threeInnerThree() {
- assertRendering("a ~~~foo~~~", "
a ~foo~
\n");
+ assertRendering("a ~~~foo~~~", "
a ~~~foo~~~
\n");
}
@Test
public void twoInnerThree() {
- assertRendering("~~foo~~~", "
foo~
\n");
+ assertRendering("~~foo~~~", "
~~foo~~~
\n");
}
@Test
public void tildesInside() {
assertRendering("~~foo~bar~~", "
foo~bar
\n");
assertRendering("~~foo~~bar~~", "
foobar~~
\n");
- assertRendering("~~foo~~~bar~~", "
foo~bar~~
\n");
- assertRendering("~~foo~~~~bar~~", "
foobar
\n");
- assertRendering("~~foo~~~~~bar~~", "
foo~bar
\n");
- assertRendering("~~foo~~~~~~bar~~", "
foo~~bar
\n");
- assertRendering("~~foo~~~~~~~bar~~", "
foo~~~bar
\n");
+ assertRendering("~~foo~~~bar~~", "
foo~~~bar
\n");
+ assertRendering("~~foo~~~~bar~~", "
foo~~~~bar
\n");
+ assertRendering("~~foo~~~~~bar~~", "
foo~~~~~bar
\n");
+ assertRendering("~~foo~~~~~~bar~~", "
foo~~~~~~bar
\n");
}
@Test
@@ -83,14 +84,27 @@ public void insideBlockQuote() {
public void delimited() {
Node document = PARSER.parse("~~foo~~");
Strikethrough strikethrough = (Strikethrough) document.getFirstChild().getFirstChild();
- assertEquals("~~", strikethrough.getOpeningDelimiter());
- assertEquals("~~", strikethrough.getClosingDelimiter());
+ assertThat(strikethrough.getOpeningDelimiter()).isEqualTo("~~");
+ assertThat(strikethrough.getClosingDelimiter()).isEqualTo("~~");
}
@Test
public void textContentRenderer() {
Node document = PARSER.parse("~~foo~~");
- assertEquals("/foo/", CONTENT_RENDERER.render(document));
+ assertThat(CONTENT_RENDERER.render(document)).isEqualTo("/foo/");
+ }
+
+ @Test
+ public void requireTwoTildesOption() {
+ Parser parser = Parser.builder()
+ .extensions(Set.of(StrikethroughExtension.builder()
+ .requireTwoTildes(true)
+ .build()))
+ .customDelimiterProcessor(new SubscriptDelimiterProcessor())
+ .build();
+
+ Node document = parser.parse("~foo~ ~~bar~~");
+ assertThat(CONTENT_RENDERER.render(document)).isEqualTo("(sub)foo(/sub) /bar/");
}
@Test
@@ -103,12 +117,36 @@ public void sourceSpans() {
Node document = parser.parse("hey ~~there~~\n");
Paragraph block = (Paragraph) document.getFirstChild();
Node strikethrough = block.getLastChild();
- assertEquals(Arrays.asList(SourceSpan.of(0, 4, 9)),
- strikethrough.getSourceSpans());
+ assertThat(strikethrough.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 4, 4, 9)));
}
@Override
protected String render(String source) {
return HTML_RENDERER.render(PARSER.parse(source));
}
+
+ private static class SubscriptDelimiterProcessor implements DelimiterProcessor {
+
+ @Override
+ public char getOpeningCharacter() {
+ return '~';
+ }
+
+ @Override
+ public char getClosingCharacter() {
+ return '~';
+ }
+
+ @Override
+ public int getMinLength() {
+ return 1;
+ }
+
+ @Override
+ public int process(DelimiterRun openingRun, DelimiterRun closingRun) {
+ openingRun.getOpener().insertAfter(new Text("(sub)"));
+ closingRun.getCloser().insertBefore(new Text("(/sub)"));
+ return 1;
+ }
+ }
}
diff --git a/commonmark-ext-gfm-tables/.settings/org.eclipse.core.runtime.prefs b/commonmark-ext-gfm-tables/.settings/org.eclipse.core.runtime.prefs
deleted file mode 100644
index 5a0ad22d2..000000000
--- a/commonmark-ext-gfm-tables/.settings/org.eclipse.core.runtime.prefs
+++ /dev/null
@@ -1,2 +0,0 @@
-eclipse.preferences.version=1
-line.separator=\n
diff --git a/commonmark-ext-gfm-tables/.settings/org.eclipse.jdt.core.prefs b/commonmark-ext-gfm-tables/.settings/org.eclipse.jdt.core.prefs
deleted file mode 100644
index 3c0d27c8f..000000000
--- a/commonmark-ext-gfm-tables/.settings/org.eclipse.jdt.core.prefs
+++ /dev/null
@@ -1,290 +0,0 @@
-eclipse.preferences.version=1
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
-org.eclipse.jdt.core.compiler.compliance=1.7
-org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
-org.eclipse.jdt.core.compiler.source=1.7
-org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
-org.eclipse.jdt.core.formatter.alignment_for_assignment=0
-org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
-org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
-org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
-org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
-org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
-org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0
-org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
-org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80
-org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
-org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch=16
-org.eclipse.jdt.core.formatter.blank_lines_after_imports=1
-org.eclipse.jdt.core.formatter.blank_lines_after_package=1
-org.eclipse.jdt.core.formatter.blank_lines_before_field=0
-org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0
-org.eclipse.jdt.core.formatter.blank_lines_before_imports=1
-org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1
-org.eclipse.jdt.core.formatter.blank_lines_before_method=1
-org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1
-org.eclipse.jdt.core.formatter.blank_lines_before_package=0
-org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1
-org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1
-org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_lambda_body=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line
-org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
-org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
-org.eclipse.jdt.core.formatter.comment.format_block_comments=true
-org.eclipse.jdt.core.formatter.comment.format_header=false
-org.eclipse.jdt.core.formatter.comment.format_html=true
-org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
-org.eclipse.jdt.core.formatter.comment.format_line_comments=true
-org.eclipse.jdt.core.formatter.comment.format_source_code=true
-org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true
-org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
-org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
-org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert
-org.eclipse.jdt.core.formatter.comment.line_length=120
-org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true
-org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true
-org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=false
-org.eclipse.jdt.core.formatter.compact_else_if=true
-org.eclipse.jdt.core.formatter.continuation_indentation=2
-org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2
-org.eclipse.jdt.core.formatter.disabling_tag=@formatter\:off
-org.eclipse.jdt.core.formatter.enabling_tag=@formatter\:on
-org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false
-org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true
-org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true
-org.eclipse.jdt.core.formatter.indent_empty_lines=false
-org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true
-org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true
-org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true
-org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true
-org.eclipse.jdt.core.formatter.indentation.size=4
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_label=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_type_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert
-org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert
-org.eclipse.jdt.core.formatter.insert_space_after_lambda_arrow=insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources=insert
-org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert
-org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_lambda_arrow=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert
-org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert
-org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert
-org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.join_lines_in_comments=true
-org.eclipse.jdt.core.formatter.join_wrapped_lines=false
-org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
-org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
-org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
-org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
-org.eclipse.jdt.core.formatter.lineSplit=120
-org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
-org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
-org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
-org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1
-org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true
-org.eclipse.jdt.core.formatter.tabulation.char=space
-org.eclipse.jdt.core.formatter.tabulation.size=4
-org.eclipse.jdt.core.formatter.use_on_off_tags=false
-org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false
-org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true
-org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true
-org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true
-org.eclipse.jdt.core.javaFormatter=org.eclipse.jdt.core.defaultJavaFormatter
diff --git a/commonmark-ext-gfm-tables/pom.xml b/commonmark-ext-gfm-tables/pom.xml
index 4b5972d14..5bd323168 100644
--- a/commonmark-ext-gfm-tables/pom.xml
+++ b/commonmark-ext-gfm-tables/pom.xml
@@ -4,7 +4,7 @@
org.commonmarkcommonmark-parent
- 0.17.3-SNAPSHOT
+ 0.28.1-SNAPSHOTcommonmark-ext-gfm-tables
@@ -24,20 +24,4 @@
-
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
-
- org.commonmark.ext.gfm.tables
-
-
-
-
-
-
-
diff --git a/commonmark-ext-gfm-tables/src/main/java/module-info.java b/commonmark-ext-gfm-tables/src/main/java/module-info.java
new file mode 100644
index 000000000..7e6d2629c
--- /dev/null
+++ b/commonmark-ext-gfm-tables/src/main/java/module-info.java
@@ -0,0 +1,5 @@
+module org.commonmark.ext.gfm.tables {
+ exports org.commonmark.ext.gfm.tables;
+
+ requires transitive org.commonmark;
+}
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java
index 61880c6c3..033c2dd04 100644
--- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java
@@ -9,6 +9,7 @@ public class TableCell extends CustomNode {
private boolean header;
private Alignment alignment;
+ private int width;
/**
* @return whether the cell is a header or not
@@ -22,7 +23,7 @@ public void setHeader(boolean header) {
}
/**
- * @return the cell alignment
+ * @return the cell alignment or {@code null} if no specific alignment
*/
public Alignment getAlignment() {
return alignment;
@@ -32,6 +33,17 @@ public void setAlignment(Alignment alignment) {
this.alignment = alignment;
}
+ /**
+ * @return the cell width (the number of dash and colon characters in the delimiter row of the table for this column)
+ */
+ public int getWidth() {
+ return width;
+ }
+
+ public void setWidth(int width) {
+ this.width = width;
+ }
+
/**
* How the cell is aligned horizontally.
*/
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java
index 5707b0f14..f754b8276 100644
--- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java
@@ -3,16 +3,22 @@
import org.commonmark.Extension;
import org.commonmark.ext.gfm.tables.internal.TableBlockParser;
import org.commonmark.ext.gfm.tables.internal.TableHtmlNodeRenderer;
+import org.commonmark.ext.gfm.tables.internal.TableMarkdownNodeRenderer;
import org.commonmark.ext.gfm.tables.internal.TableTextContentNodeRenderer;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.NodeRenderer;
import org.commonmark.renderer.html.HtmlNodeRendererContext;
import org.commonmark.renderer.html.HtmlNodeRendererFactory;
import org.commonmark.renderer.html.HtmlRenderer;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory;
+import org.commonmark.renderer.markdown.MarkdownRenderer;
import org.commonmark.renderer.text.TextContentNodeRendererContext;
import org.commonmark.renderer.text.TextContentNodeRendererFactory;
import org.commonmark.renderer.text.TextContentRenderer;
+import java.util.Set;
+
/**
* Extension for GFM tables using "|" pipes (GitHub Flavored Markdown).
*
@@ -27,7 +33,7 @@
* @see Tables (extension) in GitHub Flavored Markdown Spec
*/
public class TablesExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension,
- TextContentRenderer.TextContentRendererExtension {
+ TextContentRenderer.TextContentRendererExtension, MarkdownRenderer.MarkdownRendererExtension {
private TablesExtension() {
}
@@ -60,4 +66,19 @@ public NodeRenderer create(TextContentNodeRendererContext context) {
}
});
}
+
+ @Override
+ public void extend(MarkdownRenderer.Builder rendererBuilder) {
+ rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() {
+ @Override
+ public NodeRenderer create(MarkdownNodeRendererContext context) {
+ return new TableMarkdownNodeRenderer(context);
+ }
+
+ @Override
+ public Set getSpecialCharacters() {
+ return Set.of('|');
+ }
+ });
+ }
}
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java
index a8eedb7a6..57af128d8 100644
--- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java
@@ -1,7 +1,6 @@
package org.commonmark.ext.gfm.tables.internal;
import org.commonmark.ext.gfm.tables.*;
-import org.commonmark.internal.util.Parsing;
import org.commonmark.node.Block;
import org.commonmark.node.Node;
import org.commonmark.node.SourceSpan;
@@ -9,6 +8,7 @@
import org.commonmark.parser.SourceLine;
import org.commonmark.parser.SourceLines;
import org.commonmark.parser.block.*;
+import org.commonmark.text.Characters;
import java.util.ArrayList;
import java.util.List;
@@ -17,16 +17,18 @@ public class TableBlockParser extends AbstractBlockParser {
private final TableBlock block = new TableBlock();
private final List rowLines = new ArrayList<>();
- private final List columns;
+ private final List columns;
- private TableBlockParser(List columns, SourceLine headerLine) {
+ private boolean canHaveLazyContinuationLines = true;
+
+ private TableBlockParser(List columns, SourceLine headerLine) {
this.columns = columns;
this.rowLines.add(headerLine);
}
@Override
public boolean canHaveLazyContinuationLines() {
- return true;
+ return canHaveLazyContinuationLines;
}
@Override
@@ -36,7 +38,17 @@ public Block getBlock() {
@Override
public BlockContinue tryContinue(ParserState state) {
- if (Parsing.find('|', state.getLine().getContent(), 0) != -1) {
+ CharSequence content = state.getLine().getContent();
+ int pipe = Characters.find('|', content, state.getNextNonSpaceIndex());
+ if (pipe != -1) {
+ if (pipe == state.getNextNonSpaceIndex()) {
+ // If we *only* have a pipe character (and whitespace), that is not a valid table row and ends the table.
+ if (Characters.skipSpaceTab(content, pipe + 1, content.length()) == content.length()) {
+ // We also don't want the pipe to be added via lazy continuation.
+ canHaveLazyContinuationLines = false;
+ return BlockContinue.none();
+ }
+ }
return BlockContinue.atIndex(state.getIndex());
} else {
return BlockContinue.none();
@@ -108,12 +120,14 @@ private TableCell parseCell(SourceLine cell, int column, InlineParser inlinePars
}
if (column < columns.size()) {
- tableCell.setAlignment(columns.get(column));
+ TableCellInfo cellInfo = columns.get(column);
+ tableCell.setAlignment(cellInfo.getAlignment());
+ tableCell.setWidth(cellInfo.getWidth());
}
CharSequence content = cell.getContent();
- int start = Parsing.skipSpaceTab(content, 0, content.length());
- int end = Parsing.skipSpaceTabBackwards(content, content.length() - 1, start);
+ int start = Characters.skipSpaceTab(content, 0, content.length());
+ int end = Characters.skipSpaceTabBackwards(content, content.length() - 1, start);
inlineParser.parse(SourceLines.of(cell.substring(start, end + 1)), tableCell);
return tableCell;
@@ -121,15 +135,23 @@ private TableCell parseCell(SourceLine cell, int column, InlineParser inlinePars
private static List split(SourceLine line) {
CharSequence row = line.getContent();
- int nonSpace = Parsing.skipSpaceTab(row, 0, row.length());
- int cellStart = row.charAt(nonSpace) == '|' ? nonSpace + 1 : nonSpace;
+ int nonSpace = Characters.skipSpaceTab(row, 0, row.length());
+ int cellStart = nonSpace;
+ int cellEnd = row.length();
+ if (row.charAt(nonSpace) == '|') {
+ // This row has leading/trailing pipes - skip the leading pipe
+ cellStart = nonSpace + 1;
+ // Strip whitespace from the end but not the pipe or we could miss an empty ("||") cell
+ int nonSpaceEnd = Characters.skipSpaceTabBackwards(row, row.length() - 1, cellStart);
+ cellEnd = nonSpaceEnd + 1;
+ }
List cells = new ArrayList<>();
StringBuilder sb = new StringBuilder();
- for (int i = cellStart; i < row.length(); i++) {
+ for (int i = cellStart; i < cellEnd; i++) {
char c = row.charAt(i);
switch (c) {
case '\\':
- if (i + 1 < row.length() && row.charAt(i + 1) == '|') {
+ if (i + 1 < cellEnd && row.charAt(i + 1) == '|') {
// Pipe is special for table parsing. An escaped pipe doesn't result in a new cell, but is
// passed down to inline parsing as an unescaped pipe. Note that that applies even for the `\|`
// in an input like `\\|` - in other words, table parsing doesn't support escaping backslashes.
@@ -167,11 +189,12 @@ private static List split(SourceLine line) {
// -|-
// |-|-|
// --- | ---
- private static List parseSeparator(CharSequence s) {
- List columns = new ArrayList<>();
+ private static List parseSeparator(CharSequence s) {
+ List columns = new ArrayList<>();
int pipes = 0;
boolean valid = false;
int i = 0;
+ int width = 0;
while (i < s.length()) {
char c = s.charAt(i);
switch (c) {
@@ -196,10 +219,12 @@ private static List parseSeparator(CharSequence s) {
if (c == ':') {
left = true;
i++;
+ width++;
}
boolean haveDash = false;
while (i < s.length() && s.charAt(i) == '-') {
i++;
+ width++;
haveDash = true;
}
if (!haveDash) {
@@ -209,8 +234,10 @@ private static List parseSeparator(CharSequence s) {
if (i < s.length() && s.charAt(i) == ':') {
right = true;
i++;
+ width++;
}
- columns.add(getAlignment(left, right));
+ columns.add(new TableCellInfo(getAlignment(left, right), width));
+ width = 0;
// Next, need another pipe
pipes = 0;
break;
@@ -247,21 +274,39 @@ public static class Factory extends AbstractBlockParserFactory {
@Override
public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) {
List paragraphLines = matchedBlockParser.getParagraphLines().getLines();
- if (paragraphLines.size() == 1 && Parsing.find('|', paragraphLines.get(0).getContent(), 0) != -1) {
+ if (paragraphLines.size() >= 1 && Characters.find('|', paragraphLines.get(paragraphLines.size() - 1).getContent(), 0) != -1) {
SourceLine line = state.getLine();
SourceLine separatorLine = line.substring(state.getIndex(), line.getContent().length());
- List columns = parseSeparator(separatorLine.getContent());
+ List columns = parseSeparator(separatorLine.getContent());
if (columns != null && !columns.isEmpty()) {
- SourceLine paragraph = paragraphLines.get(0);
+ SourceLine paragraph = paragraphLines.get(paragraphLines.size() - 1);
List headerCells = split(paragraph);
if (columns.size() >= headerCells.size()) {
return BlockStart.of(new TableBlockParser(columns, paragraph))
.atIndex(state.getIndex())
- .replaceActiveBlockParser();
+ .replaceParagraphLines(1);
}
}
}
return BlockStart.none();
}
}
+
+ private static class TableCellInfo {
+ private final TableCell.Alignment alignment;
+ private final int width;
+
+ public TableCell.Alignment getAlignment() {
+ return alignment;
+ }
+
+ public int getWidth() {
+ return width;
+ }
+
+ public TableCellInfo(TableCell.Alignment alignment, int width) {
+ this.alignment = alignment;
+ this.width = width;
+ }
+ }
}
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlNodeRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlNodeRenderer.java
index a1de50aac..966c4c151 100644
--- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlNodeRenderer.java
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlNodeRenderer.java
@@ -5,7 +5,6 @@
import org.commonmark.renderer.html.HtmlNodeRendererContext;
import org.commonmark.renderer.html.HtmlWriter;
-import java.util.Collections;
import java.util.Map;
public class TableHtmlNodeRenderer extends TableNodeRenderer {
@@ -18,6 +17,7 @@ public TableHtmlNodeRenderer(HtmlNodeRendererContext context) {
this.context = context;
}
+ @Override
protected void renderBlock(TableBlock tableBlock) {
htmlWriter.line();
htmlWriter.tag("table", getAttributes(tableBlock, "table"));
@@ -26,6 +26,7 @@ protected void renderBlock(TableBlock tableBlock) {
htmlWriter.line();
}
+ @Override
protected void renderHead(TableHead tableHead) {
htmlWriter.line();
htmlWriter.tag("thead", getAttributes(tableHead, "thead"));
@@ -34,6 +35,7 @@ protected void renderHead(TableHead tableHead) {
htmlWriter.line();
}
+ @Override
protected void renderBody(TableBody tableBody) {
htmlWriter.line();
htmlWriter.tag("tbody", getAttributes(tableBody, "tbody"));
@@ -42,6 +44,7 @@ protected void renderBody(TableBody tableBody) {
htmlWriter.line();
}
+ @Override
protected void renderRow(TableRow tableRow) {
htmlWriter.line();
htmlWriter.tag("tr", getAttributes(tableRow, "tr"));
@@ -50,6 +53,7 @@ protected void renderRow(TableRow tableRow) {
htmlWriter.line();
}
+ @Override
protected void renderCell(TableCell tableCell) {
String tagName = tableCell.isHeader() ? "th" : "td";
htmlWriter.line();
@@ -60,14 +64,14 @@ protected void renderCell(TableCell tableCell) {
}
private Map getAttributes(Node node, String tagName) {
- return context.extendAttributes(node, tagName, Collections.emptyMap());
+ return context.extendAttributes(node, tagName, Map.of());
}
private Map getCellAttributes(TableCell tableCell, String tagName) {
if (tableCell.getAlignment() != null) {
- return context.extendAttributes(tableCell, tagName, Collections.singletonMap("align", getAlignValue(tableCell.getAlignment())));
+ return context.extendAttributes(tableCell, tagName, Map.of("align", getAlignValue(tableCell.getAlignment())));
} else {
- return context.extendAttributes(tableCell, tagName, Collections.emptyMap());
+ return context.extendAttributes(tableCell, tagName, Map.of());
}
}
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableMarkdownNodeRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableMarkdownNodeRenderer.java
new file mode 100644
index 000000000..b0705f579
--- /dev/null
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableMarkdownNodeRenderer.java
@@ -0,0 +1,88 @@
+package org.commonmark.ext.gfm.tables.internal;
+
+import org.commonmark.ext.gfm.tables.*;
+import org.commonmark.node.Node;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownWriter;
+import org.commonmark.text.AsciiMatcher;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * The Table node renderer that is needed for rendering GFM tables (GitHub Flavored Markdown) to text content.
+ */
+public class TableMarkdownNodeRenderer extends TableNodeRenderer {
+ private final MarkdownWriter writer;
+ private final MarkdownNodeRendererContext context;
+
+ private final AsciiMatcher pipe = AsciiMatcher.builder().c('|').build();
+
+ private final List columns = new ArrayList<>();
+
+ public TableMarkdownNodeRenderer(MarkdownNodeRendererContext context) {
+ this.writer = context.getWriter();
+ this.context = context;
+ }
+
+ @Override
+ protected void renderBlock(TableBlock node) {
+ columns.clear();
+ writer.pushTight(true);
+ renderChildren(node);
+ writer.popTight();
+ writer.block();
+ }
+
+ @Override
+ protected void renderHead(TableHead node) {
+ renderChildren(node);
+ for (TableCell.Alignment columnAlignment : columns) {
+ writer.raw('|');
+ if (columnAlignment == TableCell.Alignment.LEFT) {
+ writer.raw(":---");
+ } else if (columnAlignment == TableCell.Alignment.RIGHT) {
+ writer.raw("---:");
+ } else if (columnAlignment == TableCell.Alignment.CENTER) {
+ writer.raw(":---:");
+ } else {
+ writer.raw("---");
+ }
+ }
+ writer.raw("|");
+ writer.block();
+ }
+
+ @Override
+ protected void renderBody(TableBody node) {
+ renderChildren(node);
+ }
+
+ @Override
+ protected void renderRow(TableRow node) {
+ renderChildren(node);
+ // Trailing | at the end of the line
+ writer.raw("|");
+ writer.block();
+ }
+
+ @Override
+ protected void renderCell(TableCell node) {
+ if (node.getParent() != null && node.getParent().getParent() instanceof TableHead) {
+ columns.add(node.getAlignment());
+ }
+ writer.raw("|");
+ writer.pushRawEscape(pipe);
+ renderChildren(node);
+ writer.popRawEscape();
+ }
+
+ private void renderChildren(Node parent) {
+ Node node = parent.getFirstChild();
+ while (node != null) {
+ Node next = node.getNext();
+ context.render(node);
+ node = next;
+ }
+ }
+}
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableNodeRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableNodeRenderer.java
index 93478a30b..2982e1518 100644
--- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableNodeRenderer.java
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableNodeRenderer.java
@@ -1,28 +1,22 @@
package org.commonmark.ext.gfm.tables.internal;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.commonmark.ext.gfm.tables.TableBlock;
-import org.commonmark.ext.gfm.tables.TableBody;
-import org.commonmark.ext.gfm.tables.TableCell;
-import org.commonmark.ext.gfm.tables.TableHead;
-import org.commonmark.ext.gfm.tables.TableRow;
+import org.commonmark.ext.gfm.tables.*;
import org.commonmark.node.Node;
import org.commonmark.renderer.NodeRenderer;
+import java.util.Set;
+
abstract class TableNodeRenderer implements NodeRenderer {
@Override
public Set> getNodeTypes() {
- return new HashSet<>(Arrays.asList(
+ return Set.of(
TableBlock.class,
TableHead.class,
TableBody.class,
TableRow.class,
TableCell.class
- ));
+ );
}
@Override
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableTextContentNodeRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableTextContentNodeRenderer.java
index 94b0e8665..0ba6894b5 100644
--- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableTextContentNodeRenderer.java
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableTextContentNodeRenderer.java
@@ -22,49 +22,46 @@ public TableTextContentNodeRenderer(TextContentNodeRendererContext context) {
this.context = context;
}
+ @Override
protected void renderBlock(TableBlock tableBlock) {
+ // Render rows tight
+ textContentWriter.pushTight(true);
renderChildren(tableBlock);
- if (tableBlock.getNext() != null) {
- textContentWriter.write("\n");
- }
+ textContentWriter.popTight();
+ textContentWriter.block();
}
+ @Override
protected void renderHead(TableHead tableHead) {
renderChildren(tableHead);
}
+ @Override
protected void renderBody(TableBody tableBody) {
renderChildren(tableBody);
}
+ @Override
protected void renderRow(TableRow tableRow) {
- textContentWriter.line();
renderChildren(tableRow);
- textContentWriter.line();
+ textContentWriter.block();
}
+ @Override
protected void renderCell(TableCell tableCell) {
renderChildren(tableCell);
- textContentWriter.write('|');
- textContentWriter.whitespace();
- }
-
- private void renderLastCell(TableCell tableCell) {
- renderChildren(tableCell);
+ // For the last cell in row, don't render the delimiter
+ if (tableCell.getNext() != null) {
+ textContentWriter.write('|');
+ textContentWriter.whitespace();
+ }
}
private void renderChildren(Node parent) {
Node node = parent.getFirstChild();
while (node != null) {
Node next = node.getNext();
-
- // For last cell in row, we dont render the delimiter.
- if (node instanceof TableCell && next == null) {
- renderLastCell((TableCell) node);
- } else {
- context.render(node);
- }
-
+ context.render(node);
node = next;
}
}
diff --git a/commonmark-ext-gfm-tables/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-gfm-tables/src/main/resources/META-INF/LICENSE.txt
new file mode 100644
index 000000000..b09e367ce
--- /dev/null
+++ b/commonmark-ext-gfm-tables/src/main/resources/META-INF/LICENSE.txt
@@ -0,0 +1,23 @@
+Copyright (c) 2015, Atlassian Pty Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TableMarkdownRendererTest.java b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TableMarkdownRendererTest.java
new file mode 100644
index 000000000..85c11206c
--- /dev/null
+++ b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TableMarkdownRendererTest.java
@@ -0,0 +1,75 @@
+package org.commonmark.ext.gfm.tables;
+
+import org.commonmark.Extension;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.markdown.MarkdownRenderer;
+import org.junit.jupiter.api.Test;
+
+import java.util.Set;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+public class TableMarkdownRendererTest {
+
+ private static final Set EXTENSIONS = Set.of(TablesExtension.create());
+ private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
+ private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build();
+
+ @Test
+ public void testHeadNoBody() {
+ assertRoundTrip("|Abc|\n|---|\n");
+ assertRoundTrip("|Abc|Def|\n|---|---|\n");
+ assertRoundTrip("|Abc||\n|---|---|\n");
+ }
+
+ @Test
+ public void testHeadAndBody() {
+ assertRoundTrip("|Abc|\n|---|\n|1|\n");
+ assertRoundTrip("|Abc|Def|\n|---|---|\n|1|2|\n");
+ }
+
+ @Test
+ public void testBodyHasFewerColumns() {
+ // Could try not to write empty trailing cells but this is fine too
+ assertRoundTrip("|Abc|Def|\n|---|---|\n|1||\n");
+ }
+
+ @Test
+ public void testAlignment() {
+ assertRoundTrip("|Abc|Def|\n|:---|---|\n|1|2|\n");
+ assertRoundTrip("|Abc|Def|\n|---|---:|\n|1|2|\n");
+ assertRoundTrip("|Abc|Def|\n|:---:|:---:|\n|1|2|\n");
+ }
+
+ @Test
+ public void testInsideBlockQuote() {
+ assertRoundTrip("> |Abc|Def|\n> |---|---|\n> |1|2|\n");
+ }
+
+ @Test
+ public void testMultipleTables() {
+ assertRoundTrip("|Abc|Def|\n|---|---|\n\n|One|\n|---|\n|Only|\n");
+ }
+
+ @Test
+ public void testEscaping() {
+ assertRoundTrip("|Abc|Def|\n|---|---|\n|Pipe in|text \\||\n");
+ assertRoundTrip("|Abc|Def|\n|---|---|\n|Pipe in|code `\\|`|\n");
+ assertRoundTrip("|Abc|Def|\n|---|---|\n|Inline HTML|Foo\\|bar|\n");
+ }
+
+ @Test
+ public void testEscaped() {
+ // `|` in Text nodes needs to be escaped, otherwise the generated Markdown does not get parsed back as a table
+ assertRoundTrip("\\|Abc\\|\n\\|---\\|\n");
+ }
+
+ protected String render(String source) {
+ return RENDERER.render(PARSER.parse(source));
+ }
+
+ private void assertRoundTrip(String input) {
+ String rendered = render(input);
+ assertThat(rendered).isEqualTo(input);
+ }
+}
diff --git a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesSpecTest.java b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesSpecTest.java
index 12c806e32..e7f3db4d1 100644
--- a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesSpecTest.java
+++ b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesSpecTest.java
@@ -7,39 +7,27 @@
import org.commonmark.testutil.TestResources;
import org.commonmark.testutil.example.Example;
import org.commonmark.testutil.example.ExampleReader;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.Parameter;
+import org.junit.jupiter.params.ParameterizedClass;
+import org.junit.jupiter.params.provider.MethodSource;
-import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
import java.util.Set;
-@RunWith(Parameterized.class)
+@ParameterizedClass
+@MethodSource("data")
public class TablesSpecTest extends RenderingTestCase {
- private static final Set EXTENSIONS = Collections.singleton(TablesExtension.create());
+ private static final Set EXTENSIONS = Set.of(TablesExtension.create());
private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build();
- private final Example example;
+ @Parameter
+ Example example;
- public TablesSpecTest(Example example) {
- this.example = example;
- }
-
- @Parameters(name = "{0}")
- public static List
````````````````````````````````
+Issue #196.
+
+```````````````````````````````` example
+a
+?>
+.
+
+````````````````````````````````
diff --git a/commonmark-ext-gfm-tables/src/test/resources/gfm-spec.txt b/commonmark-test-util/src/main/resources/gfm-spec.txt
similarity index 99%
rename from commonmark-ext-gfm-tables/src/test/resources/gfm-spec.txt
rename to commonmark-test-util/src/main/resources/gfm-spec.txt
index 582131d70..d42f3369e 100644
--- a/commonmark-ext-gfm-tables/src/test/resources/gfm-spec.txt
+++ b/commonmark-test-util/src/main/resources/gfm-spec.txt
@@ -130,7 +130,7 @@ questions it does not answer:
not require that. This is hardly a "corner case," and divergences
between implementations on this issue often lead to surprises for
users in real documents. (See [this comment by John
- Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).)
+ Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).)
2. Is a blank line needed before a block quote or heading?
Most implementations do not require the blank line. However,
@@ -138,7 +138,7 @@ questions it does not answer:
also to ambiguities in parsing (note that some implementations
put the heading inside the blockquote, while others do not).
(John Gruber has also spoken [in favor of requiring the blank
- lines](http://article.gmane.org/gmane.text.markdown.general/2146).)
+ lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).)
3. Is a blank line needed before an indented code block?
(`Markdown.pl` requires it, but this is not mentioned in the
@@ -171,7 +171,7 @@ questions it does not answer:
```
(There are some relevant comments by John Gruber
- [here](http://article.gmane.org/gmane.text.markdown.general/2554).)
+ [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).)
5. Can list markers be indented? Can ordered list markers be right-aligned?
@@ -1001,10 +1001,7 @@ interpretable as a [code fence], [ATX heading][ATX headings],
A [setext heading underline](@) is a sequence of
`=` characters or a sequence of `-` characters, with no more than 3
-spaces indentation and any number of trailing spaces. If a line
-containing a single `-` can be interpreted as an
-empty [list items], it should be interpreted this way
-and not as a [setext heading underline].
+spaces of indentation and any number of trailing spaces or tabs.
The heading is a level 1 heading if `=` characters are used in
the [setext heading underline], and a level 2 heading if `-`
@@ -1638,7 +1635,7 @@ has been found, the code block contains all of the lines after the
opening code fence until the end of the containing block (or
document). (An alternative spec would require backtracking in the
event that a closing code fence is not found. But this makes parsing
-much less efficient, and there seems to be no real down side to the
+much less efficient, and there seems to be no real downside to the
behavior described here.)
A fenced code block may interrupt a paragraph, and does not require
@@ -2068,7 +2065,7 @@ followed by an uppercase ASCII letter.\
``.
-6. **Start condition:** line begins the string `<` or ``
+6. **Start condition:** line begins with the string `<` or ``
followed by one of the strings (case-insensitive) `address`,
`article`, `aside`, `base`, `basefont`, `blockquote`, `body`,
`caption`, `center`, `col`, `colgroup`, `dd`, `details`, `dialog`,
@@ -2077,7 +2074,7 @@ followed by one of the strings (case-insensitive) `address`,
`h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`,
`html`, `iframe`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`,
`nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`,
-`section`, `source`, `summary`, `table`, `tbody`, `td`,
+`section`, `summary`, `table`, `tbody`, `td`,
`tfoot`, `th`, `thead`, `title`, `tr`, `track`, `ul`, followed
by [whitespace], the end of the line, the string `>`, or
the string `/>`.\
@@ -5279,7 +5276,7 @@ well. ([reStructuredText](http://docutils.sourceforge.net/rst.html)
takes a different approach, requiring blank lines before lists
even inside other list items.)
-In order to solve of unwanted lists in paragraphs with
+In order to solve the problem of unwanted lists in paragraphs with
hard-wrapped numerals, we allow only lists starting with `1` to
interrupt paragraphs. Thus,
@@ -6929,7 +6926,7 @@ foo__bar__
```````````````````````````````` example
__foo, __bar__, baz__
.
-
````````````````````````````````
@@ -9410,10 +9407,9 @@ character, and a `>` character.
A [closing tag](@) consists of the string ``, a
[tag name], optional [whitespace], and the character `>`.
-An [HTML comment](@) consists of ``,
-where *text* does not start with `>` or `->`, does not end with `-`,
-and does not contain `--`. (See the
-[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).)
+An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the
+[HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)).
A [processing instruction](@)
consists of the string ``, a string
@@ -9554,30 +9550,20 @@ Illegal attributes in closing tag:
Comments:
```````````````````````````````` example
-foo
-.
-
foo
-````````````````````````````````
-
-
-```````````````````````````````` example
-foo
+foo
.
-
````````````````````````````````
@@ -10224,4 +10210,3 @@ closers:
After we're done, we remove all delimiters above `stack_bottom` from the
delimiter stack.
-
diff --git a/commonmark-test-util/src/main/resources/spec.txt b/commonmark-test-util/src/main/resources/spec.txt
index 3913de442..f1fab281e 100644
--- a/commonmark-test-util/src/main/resources/spec.txt
+++ b/commonmark-test-util/src/main/resources/spec.txt
@@ -1,9 +1,9 @@
---
title: CommonMark Spec
author: John MacFarlane
-version: 0.29
-date: '2019-04-06'
-license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
+version: '0.31.2'
+date: '2024-01-28'
+license: '[CC-BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)'
...
# Introduction
@@ -14,7 +14,7 @@ Markdown is a plain text format for writing structured documents,
based on conventions for indicating formatting in email
and usenet posts. It was developed by John Gruber (with
help from Aaron Swartz) and released in 2004 in the form of a
-[syntax description](http://daringfireball.net/projects/markdown/syntax)
+[syntax description](https://daringfireball.net/projects/markdown/syntax)
and a Perl script (`Markdown.pl`) for converting Markdown to
HTML. In the next decade, dozens of implementations were
developed in many languages. Some extended the original
@@ -34,10 +34,10 @@ As Gruber writes:
> Markdown-formatted document should be publishable as-is, as
> plain text, without looking like it's been marked up with tags
> or formatting instructions.
-> ()
+> ()
The point can be illustrated by comparing a sample of
-[AsciiDoc](http://www.methods.co.nz/asciidoc/) with
+[AsciiDoc](https://asciidoc.org/) with
an equivalent sample of Markdown. Here is a sample of
AsciiDoc from the AsciiDoc manual:
@@ -103,7 +103,7 @@ source, not just in the processed document.
## Why is a spec needed?
John Gruber's [canonical description of Markdown's
-syntax](http://daringfireball.net/projects/markdown/syntax)
+syntax](https://daringfireball.net/projects/markdown/syntax)
does not specify the syntax unambiguously. Here are some examples of
questions it does not answer:
@@ -114,7 +114,7 @@ questions it does not answer:
not require that. This is hardly a "corner case," and divergences
between implementations on this issue often lead to surprises for
users in real documents. (See [this comment by John
- Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).)
+ Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).)
2. Is a blank line needed before a block quote or heading?
Most implementations do not require the blank line. However,
@@ -122,7 +122,7 @@ questions it does not answer:
also to ambiguities in parsing (note that some implementations
put the heading inside the blockquote, while others do not).
(John Gruber has also spoken [in favor of requiring the blank
- lines](http://article.gmane.org/gmane.text.markdown.general/2146).)
+ lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).)
3. Is a blank line needed before an indented code block?
(`Markdown.pl` requires it, but this is not mentioned in the
@@ -155,7 +155,7 @@ questions it does not answer:
```
(There are some relevant comments by John Gruber
- [here](http://article.gmane.org/gmane.text.markdown.general/2554).)
+ [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).)
5. Can list markers be indented? Can ordered list markers be right-aligned?
@@ -270,6 +270,16 @@ of representing the structural distinctions we need to make, and the
choice of HTML for the tests makes it possible to run the tests against
an implementation without writing an abstract syntax tree renderer.
+Note that not every feature of the HTML samples is mandated by
+the spec. For example, the spec says what counts as a link
+destination, but it doesn't mandate that non-ASCII characters in
+the URL be percent-encoded. To use the automatic tests,
+implementers will need to provide a renderer that conforms to
+the expectations of the spec examples (percent-encoding
+non-ASCII characters in URLs). But a conforming implementation
+can use a different renderer and may choose not to
+percent-encode non-ASCII characters in URLs.
+
This document is generated from a text file, `spec.txt`, written
in Markdown with a small extension for the side-by-side tests.
The script `tools/makespec.py` can be used to convert `spec.txt` into
@@ -294,37 +304,31 @@ of [characters] rather than bytes. A conforming parser may be limited
to a certain encoding.
A [line](@) is a sequence of zero or more [characters]
-other than newline (`U+000A`) or carriage return (`U+000D`),
+other than line feed (`U+000A`) or carriage return (`U+000D`),
followed by a [line ending] or by the end of file.
-A [line ending](@) is a newline (`U+000A`), a carriage return
-(`U+000D`) not followed by a newline, or a carriage return and a
-following newline.
+A [line ending](@) is a line feed (`U+000A`), a carriage return
+(`U+000D`) not followed by a line feed, or a carriage return and a
+following line feed.
A line containing no characters, or a line containing only spaces
(`U+0020`) or tabs (`U+0009`), is called a [blank line](@).
The following definitions of character classes will be used in this spec:
-A [whitespace character](@) is a space
-(`U+0020`), tab (`U+0009`), newline (`U+000A`), line tabulation (`U+000B`),
-form feed (`U+000C`), or carriage return (`U+000D`).
-
-[Whitespace](@) is a sequence of one or more [whitespace
-characters].
+A [Unicode whitespace character](@) is a character in the Unicode `Zs` general
+category, or a tab (`U+0009`), line feed (`U+000A`), form feed (`U+000C`), or
+carriage return (`U+000D`).
-A [Unicode whitespace character](@) is
-any code point in the Unicode `Zs` general category, or a tab (`U+0009`),
-carriage return (`U+000D`), newline (`U+000A`), or form feed
-(`U+000C`).
+[Unicode whitespace](@) is a sequence of one or more
+[Unicode whitespace characters].
-[Unicode whitespace](@) is a sequence of one
-or more [Unicode whitespace characters].
+A [tab](@) is `U+0009`.
A [space](@) is `U+0020`.
-A [non-whitespace character](@) is any character
-that is not a [whitespace character].
+An [ASCII control character](@) is a character between `U+0000–1F` (both
+including) or `U+007F`.
An [ASCII punctuation character](@)
is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`,
@@ -333,14 +337,13 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`,
`[`, `\`, `]`, `^`, `_`, `` ` `` (U+005B–0060),
`{`, `|`, `}`, or `~` (U+007B–007E).
-A [punctuation character](@) is an [ASCII
-punctuation character] or anything in
-the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
+A [Unicode punctuation character](@) is a character in the Unicode `P`
+(puncuation) or `S` (symbol) general categories.
## Tabs
Tabs in lines are not expanded to [spaces]. However,
-in contexts where whitespace helps to define block structure,
+in contexts where spaces help to define block structure,
tabs behave as if they were replaced by spaces with a tab stop
of 4 characters.
@@ -478,3267 +481,3620 @@ bar
For security reasons, the Unicode character `U+0000` must be replaced
with the REPLACEMENT CHARACTER (`U+FFFD`).
-# Blocks and inlines
-
-We can think of a document as a sequence of
-[blocks](@)---structural elements like paragraphs, block
-quotations, lists, headings, rules, and code blocks. Some blocks (like
-block quotes and list items) contain other blocks; others (like
-headings and paragraphs) contain [inline](@) content---text,
-links, emphasized text, images, code spans, and so on.
-## Precedence
+## Backslash escapes
-Indicators of block structure always take precedence over indicators
-of inline structure. So, for example, the following is a list with
-two items, not a list with one item containing a code span:
+Any ASCII punctuation character may be backslash-escaped:
```````````````````````````````` example
-- `one
-- two`
+\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~
.
-
-
`one
-
two`
-
+
!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
````````````````````````````````
-This means that parsing can proceed in two steps: first, the block
-structure of the document can be discerned; second, text lines inside
-paragraphs, headings, and other block constructs can be parsed for inline
-structure. The second step requires information about link reference
-definitions that will be available only at the end of the first
-step. Note that the first step requires processing lines in sequence,
-but the second can be parallelized, since the inline parsing of
-one block element does not affect the inline parsing of any other.
-
-## Container blocks and leaf blocks
-
-We can divide blocks into two types:
-[container blocks](@),
-which can contain other blocks, and [leaf blocks](@),
-which cannot.
-
-# Leaf blocks
+Backslashes before other characters are treated as literal
+backslashes:
-This section describes the different kinds of leaf block that make up a
-Markdown document.
+```````````````````````````````` example
+\→\A\a\ \3\φ\«
+.
+
\→\A\a\ \3\φ\«
+````````````````````````````````
-## Thematic breaks
-A line consisting of 0-3 spaces of indentation, followed by a sequence
-of three or more matching `-`, `_`, or `*` characters, each followed
-optionally by any number of spaces or tabs, forms a
-[thematic break](@).
+Escaped characters are treated as regular characters and do
+not have their usual Markdown meanings:
```````````````````````````````` example
-***
----
-___
+\*not emphasized*
+\ not a tag
+\[not a link](/foo)
+\`not code`
+1\. not a list
+\* not a list
+\# not a heading
+\[foo]: /url "not a reference"
+\ö not a character entity
.
-
-
-
+
*not emphasized*
+<br/> not a tag
+[not a link](/foo)
+`not code`
+1. not a list
+* not a list
+# not a heading
+[foo]: /url "not a reference"
+ö not a character entity
````````````````````````````````
-Wrong characters:
+If a backslash is itself escaped, the following character is not:
```````````````````````````````` example
-+++
+\\*emphasis*
.
-
+++
+
\emphasis
````````````````````````````````
+A backslash at the end of the line is a [hard line break]:
+
```````````````````````````````` example
-===
+foo\
+bar
.
-
===
+
foo
+bar
````````````````````````````````
-Not enough characters:
+Backslash escapes do not work in code blocks, code spans, autolinks, or
+raw HTML:
```````````````````````````````` example
---
-**
-__
+`` \[\` ``
.
-
--
-**
-__
+
\[\`
````````````````````````````````
-One to three spaces indent are allowed:
-
```````````````````````````````` example
- ***
- ***
- ***
+ \[\]
.
-
-
-
+
\[\]
+
````````````````````````````````
-Four spaces is too many:
-
```````````````````````````````` example
- ***
+~~~
+\[\]
+~~~
.
-
***
+
\[\]
````````````````````````````````
```````````````````````````````` example
-Foo
- ***
+
.
-
````````````````````````````````
-It is required that all of the [non-whitespace characters] be the same.
-So, this is not a thematic break:
+[Decimal numeric character
+references](@)
+consist of `` + a string of 1--7 arabic digits + `;`. A
+numeric character reference is parsed as the corresponding
+Unicode character. Invalid Unicode code points will be replaced by
+the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons,
+the code point `U+0000` will also be replaced by `U+FFFD`.
```````````````````````````````` example
- *-*
+# Ӓ Ϡ
.
-
-
+
# Ӓ Ϡ �
````````````````````````````````
-Thematic breaks do not need blank lines before or after:
+[Hexadecimal numeric character
+references](@) consist of `` +
+either `X` or `x` + a string of 1-6 hexadecimal digits + `;`.
+They too are parsed as the corresponding Unicode character (this
+time specified with a hexadecimal numeral instead of decimal).
```````````````````````````````` example
-- foo
-***
-- bar
+" ആ ಫ
.
-
-
foo
-
-
-
-
bar
-
+
" ആ ಫ
````````````````````````````````
-Thematic breaks can interrupt a paragraph:
+Here are some nonentities:
```````````````````````````````` example
-Foo
-***
-bar
+  &x;
+
+abcdef0;
+&ThisIsNotDefined; &hi?;
.
-
````````````````````````````````
-When both a thematic break and a list item are possible
-interpretations of a line, the thematic break takes precedence:
+Strings that are not on the list of HTML5 named entities are not
+recognized as entity references either:
```````````````````````````````` example
-* Foo
-* * *
-* Bar
+&MadeUpEntity;
.
-
-
Foo
-
-
-
-
Bar
-
+
&MadeUpEntity;
````````````````````````````````
-If you want a thematic break in a list item, use a different bullet:
+Entity and numeric character references are recognized in any
+context besides code spans or code blocks, including
+URLs, [link titles], and [fenced code block][] [info strings]:
```````````````````````````````` example
-- Foo
-- * * *
+
.
-
````````````````````````````````
-More than six `#` characters is not a heading:
-
```````````````````````````````` example
-####### foo
+``` föö
+foo
+```
.
-
####### foo
+
foo
+
````````````````````````````````
-At least one space is required between the `#` characters and the
-heading's contents, unless the heading is empty. Note that many
-implementations currently do not require the space. However, the
-space was required by the
-[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py),
-and it helps prevent things like the following from being parsed as
-headings:
+Entity and numeric character references are treated as literal
+text in code spans and code blocks:
```````````````````````````````` example
-#5 bolt
-
-#hashtag
+`föö`
.
-
#5 bolt
-
#hashtag
+
föö
````````````````````````````````
-This is not a heading, because the first `#` is escaped:
-
```````````````````````````````` example
-\## foo
+ föfö
.
-
## foo
+
föfö
+
````````````````````````````````
-Contents are parsed as inlines:
+Entity and numeric character references cannot be used
+in place of symbols indicating structure in CommonMark
+documents.
```````````````````````````````` example
-# foo *bar* \*baz\*
+*foo*
+*foo*
.
-
foo bar *baz*
+
*foo*
+foo
````````````````````````````````
+```````````````````````````````` example
+* foo
-Leading and trailing [whitespace] is ignored in parsing inline content:
+* foo
+.
+
* foo
+
+
foo
+
+````````````````````````````````
```````````````````````````````` example
-# foo
+foo
bar
.
-
foo
+
foo
+
+bar
````````````````````````````````
+```````````````````````````````` example
+ foo
+.
+
→foo
+````````````````````````````````
-One to three spaces indentation are allowed:
```````````````````````````````` example
- ### foo
- ## foo
- # foo
+[a](url "tit")
.
-
foo
-
foo
-
foo
+
[a](url "tit")
````````````````````````````````
-Four spaces are too much:
+
+# Blocks and inlines
+
+We can think of a document as a sequence of
+[blocks](@)---structural elements like paragraphs, block
+quotations, lists, headings, rules, and code blocks. Some blocks (like
+block quotes and list items) contain other blocks; others (like
+headings and paragraphs) contain [inline](@) content---text,
+links, emphasized text, images, code spans, and so on.
+
+## Precedence
+
+Indicators of block structure always take precedence over indicators
+of inline structure. So, for example, the following is a list with
+two items, not a list with one item containing a code span:
```````````````````````````````` example
- # foo
+- `one
+- two`
.
-
# foo
-
+
+
`one
+
two`
+
````````````````````````````````
+This means that parsing can proceed in two steps: first, the block
+structure of the document can be discerned; second, text lines inside
+paragraphs, headings, and other block constructs can be parsed for inline
+structure. The second step requires information about link reference
+definitions that will be available only at the end of the first
+step. Note that the first step requires processing lines in sequence,
+but the second can be parallelized, since the inline parsing of
+one block element does not affect the inline parsing of any other.
+
+## Container blocks and leaf blocks
+
+We can divide blocks into two types:
+[container blocks](#container-blocks),
+which can contain other blocks, and [leaf blocks](#leaf-blocks),
+which cannot.
+
+# Leaf blocks
+
+This section describes the different kinds of leaf block that make up a
+Markdown document.
+
+## Thematic breaks
+
+A line consisting of optionally up to three spaces of indentation, followed by a
+sequence of three or more matching `-`, `_`, or `*` characters, each followed
+optionally by any number of spaces or tabs, forms a
+[thematic break](@).
+
```````````````````````````````` example
-foo
- # bar
+***
+---
+___
.
-
foo
-# bar
+
+
+
````````````````````````````````
-A closing sequence of `#` characters is optional:
+Wrong characters:
```````````````````````````````` example
-## foo ##
- ### bar ###
++++
.
-
foo
-
bar
+
+++
````````````````````````````````
-It need not be the same length as the opening sequence:
-
```````````````````````````````` example
-# foo ##################################
-##### foo ##
+===
.
-
foo
-
foo
+
===
````````````````````````````````
-Spaces are allowed after the closing sequence:
+Not enough characters:
```````````````````````````````` example
-### foo ###
+--
+**
+__
.
-
foo
+
--
+**
+__
````````````````````````````````
-A sequence of `#` characters with anything but [spaces] following it
-is not a closing sequence, but counts as part of the contents of the
-heading:
+Up to three spaces of indentation are allowed:
```````````````````````````````` example
-### foo ### b
+ ***
+ ***
+ ***
.
-
foo ### b
+
+
+
````````````````````````````````
-The closing sequence must be preceded by a space:
+Four spaces of indentation is too many:
```````````````````````````````` example
-# foo#
+ ***
.
-
foo#
+
***
+
````````````````````````````````
-Backslash-escaped `#` characters do not count as part
-of the closing sequence:
-
```````````````````````````````` example
-### foo \###
-## foo #\##
-# foo \#
+Foo
+ ***
.
-
foo ###
-
foo ###
-
foo #
+
Foo
+***
````````````````````````````````
-ATX headings need not be separated from surrounding content by blank
-lines, and they can interrupt paragraphs:
+More than three characters may be used:
```````````````````````````````` example
-****
-## foo
-****
+_____________________________________
.
-
foo
-
````````````````````````````````
+Spaces and tabs are allowed between the characters:
+
```````````````````````````````` example
-Foo bar
-# baz
-Bar foo
+ - - -
.
-
Foo bar
-
baz
-
Bar foo
+
````````````````````````````````
-ATX headings can be empty:
-
```````````````````````````````` example
-##
-#
-### ###
+ ** * ** * ** * **
.
-
-
-
+
````````````````````````````````
-## Setext headings
+```````````````````````````````` example
+- - - -
+.
+
+````````````````````````````````
-A [setext heading](@) consists of one or more
-lines of text, each containing at least one [non-whitespace
-character], with no more than 3 spaces indentation, followed by
-a [setext heading underline]. The lines of text must be such
-that, were they not followed by the setext heading underline,
-they would be interpreted as a paragraph: they cannot be
-interpretable as a [code fence], [ATX heading][ATX headings],
-[block quote][block quotes], [thematic break][thematic breaks],
-[list item][list items], or [HTML block][HTML blocks].
-A [setext heading underline](@) is a sequence of
-`=` characters or a sequence of `-` characters, with no more than 3
-spaces indentation and any number of trailing spaces. If a line
-containing a single `-` can be interpreted as an
-empty [list items], it should be interpreted this way
-and not as a [setext heading underline].
+Spaces and tabs are allowed at the end:
-The heading is a level 1 heading if `=` characters are used in
-the [setext heading underline], and a level 2 heading if `-`
-characters are used. The contents of the heading are the result
-of parsing the preceding lines of text as CommonMark inline
-content.
+```````````````````````````````` example
+- - - -
+.
+
+````````````````````````````````
-In general, a setext heading need not be preceded or followed by a
-blank line. However, it cannot interrupt a paragraph, so when a
-setext heading comes after a paragraph, a blank line is needed between
-them.
-Simple examples:
+However, no other characters may occur in the line:
```````````````````````````````` example
-Foo *bar*
-=========
+_ _ _ _ a
-Foo *bar*
----------
+a------
+
+---a---
.
-
Foo bar
-
Foo bar
+
_ _ _ _ a
+
a------
+
---a---
````````````````````````````````
-The content of the header may span more than one line:
+It is required that all of the characters other than spaces or tabs be the same.
+So, this is not a thematic break:
```````````````````````````````` example
-Foo *bar
-baz*
-====
+ *-*
.
-
Foo bar
-baz
+
-
````````````````````````````````
-The contents are the result of parsing the headings's raw
-content as inlines. The heading's raw content is formed by
-concatenating the lines and removing initial and final
-[whitespace].
+
+Thematic breaks do not need blank lines before or after:
```````````````````````````````` example
- Foo *bar
-baz*→
-====
+- foo
+***
+- bar
.
-
Foo bar
-baz
+
+
foo
+
+
+
+
bar
+
````````````````````````````````
-The underlining can be any length:
+Thematic breaks can interrupt a paragraph:
```````````````````````````````` example
Foo
--------------------------
-
-Foo
-=
+***
+bar
.
-
Foo
-
Foo
+
Foo
+
+
bar
````````````````````````````````
-The heading content can be indented up to three spaces, and need
-not line up with the underlining:
+If a line of dashes that meets the above conditions for being a
+thematic break could also be interpreted as the underline of a [setext
+heading], the interpretation as a
+[setext heading] takes precedence. Thus, for example,
+this is a setext heading, not a paragraph followed by a thematic break:
```````````````````````````````` example
- Foo
+Foo
---
-
- Foo
------
-
- Foo
- ===
+bar
.
Foo
-
Foo
-
Foo
+
bar
````````````````````````````````
-Four spaces indent is too much:
+When both a thematic break and a list item are possible
+interpretations of a line, the thematic break takes precedence:
```````````````````````````````` example
- Foo
- ---
-
- Foo
----
+* Foo
+* * *
+* Bar
.
-
Foo
----
+
+
Foo
+
+
+
+
Bar
+
+````````````````````````````````
-Foo
-
+
+If you want a thematic break in a list item, use a different bullet:
+
+```````````````````````````````` example
+- Foo
+- * * *
+.
+
+
Foo
+
+
+
````````````````````````````````
-The setext heading underline can be indented up to three spaces, and
-may have trailing spaces:
+## ATX headings
+
+An [ATX heading](@)
+consists of a string of characters, parsed as inline content, between an
+opening sequence of 1--6 unescaped `#` characters and an optional
+closing sequence of any number of unescaped `#` characters.
+The opening sequence of `#` characters must be followed by spaces or tabs, or
+by the end of line. The optional closing sequence of `#`s must be preceded by
+spaces or tabs and may be followed by spaces or tabs only. The opening
+`#` character may be preceded by up to three spaces of indentation. The raw
+contents of the heading are stripped of leading and trailing space or tabs
+before being parsed as inline content. The heading level is equal to the number
+of `#` characters in the opening sequence.
+
+Simple headings:
```````````````````````````````` example
-Foo
- ----
+# foo
+## foo
+### foo
+#### foo
+##### foo
+###### foo
.
-
Foo
+
foo
+
foo
+
foo
+
foo
+
foo
+
foo
````````````````````````````````
-Four spaces is too much:
+More than six `#` characters is not a heading:
```````````````````````````````` example
-Foo
- ---
+####### foo
.
-
Foo
----
+
####### foo
````````````````````````````````
-The setext heading underline cannot contain internal spaces:
+At least one space or tab is required between the `#` characters and the
+heading's contents, unless the heading is empty. Note that many
+implementations currently do not require the space. However, the
+space was required by the
+[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py),
+and it helps prevent things like the following from being parsed as
+headings:
```````````````````````````````` example
-Foo
-= =
+#5 bolt
-Foo
---- -
+#hashtag
.
-
Foo
-= =
-
Foo
-
+
#5 bolt
+
#hashtag
````````````````````````````````
-Trailing spaces in the content line do not cause a line break:
+This is not a heading, because the first `#` is escaped:
```````````````````````````````` example
-Foo
------
+\## foo
.
-
Foo
+
## foo
````````````````````````````````
-Nor does a backslash at the end:
+Contents are parsed as inlines:
```````````````````````````````` example
-Foo\
-----
+# foo *bar* \*baz\*
.
-
Foo\
+
foo bar *baz*
````````````````````````````````
-Since indicators of block structure take precedence over
-indicators of inline structure, the following are setext headings:
+Leading and trailing spaces or tabs are ignored in parsing inline content:
```````````````````````````````` example
-`Foo
-----
-`
-
-
+# foo
.
-
`Foo
-
`
-
<a title="a lot
-
of dashes"/>
+
foo
````````````````````````````````
-The setext heading underline cannot be a [lazy continuation
-line] in a list item or block quote:
+Up to three spaces of indentation are allowed:
```````````````````````````````` example
-> Foo
----
+ ### foo
+ ## foo
+ # foo
.
-
-
Foo
-
-
+
foo
+
foo
+
foo
````````````````````````````````
+Four spaces of indentation is too many:
+
```````````````````````````````` example
-> foo
-bar
-===
+ # foo
.
-
-
foo
-bar
-===
-
+
# foo
+
````````````````````````````````
```````````````````````````````` example
-- Foo
----
+foo
+ # bar
.
-
-
Foo
-
-
+
foo
+# bar
````````````````````````````````
-A blank line is needed between a paragraph and a following
-setext heading, since otherwise the paragraph becomes part
-of the heading's content:
+A closing sequence of `#` characters is optional:
```````````````````````````````` example
-Foo
-Bar
----
+## foo ##
+ ### bar ###
.
-
Foo
-Bar
+
foo
+
bar
````````````````````````````````
-But in general a blank line is not required before or after
-setext headings:
+It need not be the same length as the opening sequence:
```````````````````````````````` example
----
-Foo
----
-Bar
----
-Baz
+# foo ##################################
+##### foo ##
.
-
-
Foo
-
Bar
-
Baz
+
foo
+
foo
````````````````````````````````
-Setext headings cannot be empty:
+Spaces or tabs are allowed after the closing sequence:
```````````````````````````````` example
+### foo ###
+.
+
foo
+````````````````````````````````
-====
+
+A sequence of `#` characters with anything but spaces or tabs following it
+is not a closing sequence, but counts as part of the contents of the
+heading:
+
+```````````````````````````````` example
+### foo ### b
.
-
====
+
foo ### b
````````````````````````````````
-Setext heading text lines must not be interpretable as block
-constructs other than paragraphs. So, the line of dashes
-in these examples gets interpreted as a thematic break:
+The closing sequence must be preceded by a space or tab:
```````````````````````````````` example
----
----
+# foo#
.
-
-
+
foo#
````````````````````````````````
+Backslash-escaped `#` characters do not count as part
+of the closing sequence:
+
```````````````````````````````` example
-- foo
------
+### foo \###
+## foo #\##
+# foo \#
.
-
-
foo
-
-
+
foo ###
+
foo ###
+
foo #
````````````````````````````````
+ATX headings need not be separated from surrounding content by blank
+lines, and they can interrupt paragraphs:
+
```````````````````````````````` example
- foo
----
+****
+## foo
+****
.
-
foo
-
+
+
foo
````````````````````````````````
```````````````````````````````` example
-> foo
------
+Foo bar
+# baz
+Bar foo
.
-
-
foo
-
-
+
Foo bar
+
baz
+
Bar foo
````````````````````````````````
-If you want a heading with `> foo` as its literal text, you can
-use backslash escapes:
+ATX headings can be empty:
```````````````````````````````` example
-\> foo
-------
+##
+#
+### ###
.
-
> foo
+
+
+
````````````````````````````````
-**Compatibility note:** Most existing Markdown implementations
-do not allow the text of setext headings to span multiple lines.
-But there is no consensus about how to interpret
+## Setext headings
-``` markdown
-Foo
-bar
----
-baz
-```
+A [setext heading](@) consists of one or more
+lines of text, not interrupted by a blank line, of which the first line does not
+have more than 3 spaces of indentation, followed by
+a [setext heading underline]. The lines of text must be such
+that, were they not followed by the setext heading underline,
+they would be interpreted as a paragraph: they cannot be
+interpretable as a [code fence], [ATX heading][ATX headings],
+[block quote][block quotes], [thematic break][thematic breaks],
+[list item][list items], or [HTML block][HTML blocks].
-One can find four different interpretations:
+A [setext heading underline](@) is a sequence of
+`=` characters or a sequence of `-` characters, with no more than 3
+spaces of indentation and any number of trailing spaces or tabs.
-1. paragraph "Foo", heading "bar", paragraph "baz"
-2. paragraph "Foo bar", thematic break, paragraph "baz"
-3. paragraph "Foo bar --- baz"
-4. heading "Foo bar", paragraph "baz"
+The heading is a level 1 heading if `=` characters are used in
+the [setext heading underline], and a level 2 heading if `-`
+characters are used. The contents of the heading are the result
+of parsing the preceding lines of text as CommonMark inline
+content.
-We find interpretation 4 most natural, and interpretation 4
-increases the expressive power of CommonMark, by allowing
-multiline headings. Authors who want interpretation 1 can
-put a blank line after the first paragraph:
+In general, a setext heading need not be preceded or followed by a
+blank line. However, it cannot interrupt a paragraph, so when a
+setext heading comes after a paragraph, a blank line is needed between
+them.
+
+Simple examples:
```````````````````````````````` example
-Foo
+Foo *bar*
+=========
-bar
----
-baz
+Foo *bar*
+---------
.
-
Foo
-
bar
-
baz
+
Foo bar
+
Foo bar
````````````````````````````````
-Authors who want interpretation 2 can put blank lines around
-the thematic break,
+The content of the header may span more than one line:
```````````````````````````````` example
-Foo
-bar
+Foo *bar
+baz*
+====
+.
+
Foo bar
+baz
+````````````````````````````````
----
+The contents are the result of parsing the headings's raw
+content as inlines. The heading's raw content is formed by
+concatenating the lines and removing initial and final
+spaces or tabs.
-baz
+```````````````````````````````` example
+ Foo *bar
+baz*→
+====
.
-
Foo
-bar
-
-
baz
+
Foo bar
+baz
````````````````````````````````
-or use a thematic break that cannot count as a [setext heading
-underline], such as
+The underlining can be any length:
```````````````````````````````` example
Foo
-bar
-* * *
-baz
+-------------------------
+
+Foo
+=
.
-
Foo
-bar
-
-
baz
+
Foo
+
Foo
````````````````````````````````
-Authors who want interpretation 3 can use backslash escapes:
+The heading content can be preceded by up to three spaces of indentation, and
+need not line up with the underlining:
```````````````````````````````` example
-Foo
-bar
-\---
-baz
-.
-
+````````````````````````````````
-An [indented code block](@) is composed of one or more
-[indented chunks] separated by blank lines.
-An [indented chunk](@) is a sequence of non-blank lines,
-each indented four or more spaces. The contents of the code block are
-the literal contents of the lines, including trailing
-[line endings], minus four spaces of indentation.
-An indented code block has no [info string].
-An indented code block cannot interrupt a paragraph, so there must be
-a blank line between a paragraph and a following indented code block.
-(A blank line is not needed, however, between a code block and a following
-paragraph.)
+Four spaces of indentation is too many:
```````````````````````````````` example
- a simple
- indented code block
+ Foo
+ ---
+
+ Foo
+---
.
-
a simple
- indented code block
+
Foo
+---
+
+Foo
+
````````````````````````````````
-If there is any ambiguity between an interpretation of indentation
-as a code block and as indicating that material belongs to a [list
-item][list items], the list item interpretation takes precedence:
+The setext heading underline can be preceded by up to three spaces of
+indentation, and may have trailing spaces or tabs:
```````````````````````````````` example
- - foo
-
- bar
+Foo
+ ----
.
-
-
-
foo
-
bar
-
-
+
Foo
````````````````````````````````
-```````````````````````````````` example
-1. foo
-
- - bar
-.
-
-
-
foo
-
-
bar
-
-
-
-````````````````````````````````
-
-
-
-The contents of a code block are literal text, and do not get parsed
-as Markdown:
+Four spaces of indentation is too many:
```````````````````````````````` example
-
- *hi*
-
- - one
+Foo
+ ---
.
-
<a/>
-*hi*
-
-- one
-
+
Foo
+---
````````````````````````````````
-Here we have three chunks separated by blank lines:
+The setext heading underline cannot contain internal spaces or tabs:
```````````````````````````````` example
- chunk1
+Foo
+= =
- chunk2
-
-
-
- chunk3
+Foo
+--- -
.
-
chunk1
-
-chunk2
-
-
-
-chunk3
-
+
Foo
+= =
+
Foo
+
````````````````````````````````
-Any initial spaces beyond four will be included in the content, even
-in interior blank lines:
+Trailing spaces or tabs in the content line do not cause a hard line break:
```````````````````````````````` example
- chunk1
-
- chunk2
+Foo
+-----
.
-
chunk1
-
- chunk2
-
+
Foo
````````````````````````````````
-An indented code block cannot interrupt a paragraph. (This
-allows hanging indents and the like.)
+Nor does a backslash at the end:
```````````````````````````````` example
-Foo
- bar
-
+Foo\
+----
.
-
Foo
-bar
+
Foo\
````````````````````````````````
-However, any non-blank line with fewer than four leading spaces ends
-the code block immediately. So a paragraph may occur immediately
-after indented code:
+Since indicators of block structure take precedence over
+indicators of inline structure, the following are setext headings:
```````````````````````````````` example
- foo
-bar
+`Foo
+----
+`
+
+
.
-
foo
-
-
bar
+
`Foo
+
`
+
<a title="a lot
+
of dashes"/>
````````````````````````````````
-And indented code can occur immediately before and after other kinds of
-blocks:
+The setext heading underline cannot be a [lazy continuation
+line] in a list item or block quote:
```````````````````````````````` example
-# Heading
- foo
-Heading
-------
- foo
-----
+> Foo
+---
.
-
Heading
-
foo
-
-
Heading
-
foo
-
+
+
Foo
+
````````````````````````````````
-The first line can be indented more than four spaces:
-
```````````````````````````````` example
- foo
- bar
+> foo
+bar
+===
.
-
foo
+
+
foo
bar
-
+===
+
````````````````````````````````
-Blank lines preceding or following an indented code block
-are not included in it:
-
```````````````````````````````` example
-
-
- foo
-
-
+- Foo
+---
.
-
foo
-
+
+
Foo
+
+
````````````````````````````````
-Trailing spaces are included in the code block's content:
+A blank line is needed between a paragraph and a following
+setext heading, since otherwise the paragraph becomes part
+of the heading's content:
```````````````````````````````` example
- foo
+Foo
+Bar
+---
.
-
foo
-
+
Foo
+Bar
````````````````````````````````
+But in general a blank line is not required before or after
+setext headings:
-## Fenced code blocks
-
-A [code fence](@) is a sequence
-of at least three consecutive backtick characters (`` ` ``) or
-tildes (`~`). (Tildes and backticks cannot be mixed.)
-A [fenced code block](@)
-begins with a code fence, indented no more than three spaces.
-
-The line with the opening code fence may optionally contain some text
-following the code fence; this is trimmed of leading and trailing
-whitespace and called the [info string](@). If the [info string] comes
-after a backtick fence, it may not contain any backtick
-characters. (The reason for this restriction is that otherwise
-some inline code would be incorrectly interpreted as the
-beginning of a fenced code block.)
-
-The content of the code block consists of all subsequent lines, until
-a closing [code fence] of the same type as the code block
-began with (backticks or tildes), and with at least as many backticks
-or tildes as the opening code fence. If the leading code fence is
-indented N spaces, then up to N spaces of indentation are removed from
-each line of the content (if present). (If a content line is not
-indented, it is preserved unchanged. If it is indented less than N
-spaces, all of the indentation is removed.)
-
-The closing code fence may be indented up to three spaces, and may be
-followed only by spaces, which are ignored. If the end of the
-containing block (or document) is reached and no closing code fence
-has been found, the code block contains all of the lines after the
-opening code fence until the end of the containing block (or
-document). (An alternative spec would require backtracking in the
-event that a closing code fence is not found. But this makes parsing
-much less efficient, and there seems to be no real down side to the
-behavior described here.)
-
-A fenced code block may interrupt a paragraph, and does not require
-a blank line either before or after.
+```````````````````````````````` example
+---
+Foo
+---
+Bar
+---
+Baz
+.
+
+
Foo
+
Bar
+
Baz
+````````````````````````````````
-The content of a code fence is treated as literal text, not parsed
-as inlines. The first word of the [info string] is typically used to
-specify the language of the code sample, and rendered in the `class`
-attribute of the `code` tag. However, this spec does not mandate any
-particular treatment of the [info string].
-Here is a simple example with backticks:
+Setext headings cannot be empty:
```````````````````````````````` example
-```
-<
- >
-```
+
+====
.
-
<
- >
-
+
====
````````````````````````````````
-With tildes:
+Setext heading text lines must not be interpretable as block
+constructs other than paragraphs. So, the line of dashes
+in these examples gets interpreted as a thematic break:
```````````````````````````````` example
-~~~
-<
- >
-~~~
+---
+---
.
-
<
- >
-
+
+
````````````````````````````````
-Fewer than three backticks is not enough:
```````````````````````````````` example
-``
-foo
-``
+- foo
+-----
.
-
foo
+
+
foo
+
+
````````````````````````````````
-The closing code fence must use the same character as the opening
-fence:
```````````````````````````````` example
-```
-aaa
-~~~
-```
+ foo
+---
.
-
+
````````````````````````````````
-The closing code fence must be at least as long as the opening fence:
+If you want a heading with `> foo` as its literal text, you can
+use backslash escapes:
```````````````````````````````` example
-````
-aaa
-```
-``````
+\> foo
+------
.
-
aaa
-```
-
+
> foo
````````````````````````````````
-```````````````````````````````` example
-~~~~
-aaa
-~~~
-~~~~
-.
-
aaa
-~~~
-
-````````````````````````````````
+**Compatibility note:** Most existing Markdown implementations
+do not allow the text of setext headings to span multiple lines.
+But there is no consensus about how to interpret
+``` markdown
+Foo
+bar
+---
+baz
+```
-Unclosed code blocks are closed by the end of the document
-(or the enclosing [block quote][block quotes] or [list item][list items]):
+One can find four different interpretations:
-```````````````````````````````` example
-```
-.
-
-````````````````````````````````
+1. paragraph "Foo", heading "bar", paragraph "baz"
+2. paragraph "Foo bar", thematic break, paragraph "baz"
+3. paragraph "Foo bar --- baz"
+4. heading "Foo bar", paragraph "baz"
+We find interpretation 4 most natural, and interpretation 4
+increases the expressive power of CommonMark, by allowing
+multiline headings. Authors who want interpretation 1 can
+put a blank line after the first paragraph:
```````````````````````````````` example
-`````
+Foo
-```
-aaa
+bar
+---
+baz
.
-
-```
-aaa
-
+
Foo
+
bar
+
baz
````````````````````````````````
+Authors who want interpretation 2 can put blank lines around
+the thematic break,
+
```````````````````````````````` example
-> ```
-> aaa
+Foo
+bar
-bbb
+---
+
+baz
.
-
-
aaa
-
-
-
bbb
+
Foo
+bar
+
+
baz
````````````````````````````````
-A code block can have all empty lines as its content:
+or use a thematic break that cannot count as a [setext heading
+underline], such as
```````````````````````````````` example
-```
-
-
-```
+Foo
+bar
+* * *
+baz
.
-
-
-
+
Foo
+bar
+
+
baz
````````````````````````````````
-A code block can be empty:
+Authors who want interpretation 3 can use backslash escapes:
```````````````````````````````` example
-```
-```
+Foo
+bar
+\---
+baz
.
-
+
Foo
+bar
+---
+baz
````````````````````````````````
-Fences can be indented. If the opening fence is indented,
-content lines will have equivalent opening indentation removed,
-if present:
+## Indented code blocks
+
+An [indented code block](@) is composed of one or more
+[indented chunks] separated by blank lines.
+An [indented chunk](@) is a sequence of non-blank lines,
+each preceded by four or more spaces of indentation. The contents of the code
+block are the literal contents of the lines, including trailing
+[line endings], minus four spaces of indentation.
+An indented code block has no [info string].
+
+An indented code block cannot interrupt a paragraph, so there must be
+a blank line between a paragraph and a following indented code block.
+(A blank line is not needed, however, between a code block and a following
+paragraph.)
```````````````````````````````` example
- ```
- aaa
-aaa
-```
+ a simple
+ indented code block
.
-
aaa
-aaa
+
a simple
+ indented code block
````````````````````````````````
+If there is any ambiguity between an interpretation of indentation
+as a code block and as indicating that material belongs to a [list
+item][list items], the list item interpretation takes precedence:
+
```````````````````````````````` example
- ```
-aaa
- aaa
-aaa
- ```
+ - foo
+
+ bar
.
-
+
````````````````````````````````
-Four spaces indentation produces an indented code block:
+
+The contents of a code block are literal text, and do not get parsed
+as Markdown:
```````````````````````````````` example
- ```
- aaa
- ```
+
+ *hi*
+
+ - one
.
-
```
-aaa
-```
+
<a/>
+*hi*
+
+- one
````````````````````````````````
-Closing fences may be indented by 0-3 spaces, and their indentation
-need not match that of the opening fence:
+Here we have three chunks separated by blank lines:
```````````````````````````````` example
-```
-aaa
- ```
+ chunk1
+
+ chunk2
+
+
+
+ chunk3
.
-
````````````````````````````````
-This is not a closing fence, because it is indented 4 spaces:
+Any initial spaces or tabs beyond four spaces of indentation will be included in
+the content, even in interior blank lines:
```````````````````````````````` example
-```
-aaa
- ```
+ chunk1
+
+ chunk2
.
-
aaa
- ```
+
chunk1
+
+ chunk2
````````````````````````````````
-
-Code fences (opening and closing) cannot contain internal spaces:
+An indented code block cannot interrupt a paragraph. (This
+allows hanging indents and the like.)
```````````````````````````````` example
-``` ```
-aaa
+Foo
+ bar
+
.
-
-aaa
+
Foo
+bar
````````````````````````````````
+However, any non-blank line with fewer than four spaces of indentation ends
+the code block immediately. So a paragraph may occur immediately
+after indented code:
+
```````````````````````````````` example
-~~~~~~
-aaa
-~~~ ~~
+ foo
+bar
.
-
aaa
-~~~ ~~
+
foo
+
bar
````````````````````````````````
-Fenced code blocks can interrupt paragraphs, and can be followed
-directly by paragraphs, without a blank line between:
+And indented code can occur immediately before and after other kinds of
+blocks:
```````````````````````````````` example
-foo
-```
-bar
-```
-baz
+# Heading
+ foo
+Heading
+------
+ foo
+----
.
-
foo
-
bar
+
Heading
+
foo
-
baz
+
Heading
+
foo
+
+
````````````````````````````````
-Other blocks can also occur before and after fenced code blocks
-without an intervening blank line:
+The first line can be preceded by more than four spaces of indentation:
```````````````````````````````` example
-foo
----
-~~~
-bar
-~~~
-# baz
+ foo
+ bar
.
-
foo
-
bar
+
foo
+bar
-
baz
````````````````````````````````
-An [info string] can be provided after the opening code fence.
-Although this spec doesn't mandate any particular treatment of
-the info string, the first word is typically used to specify
-the language of the code block. In HTML output, the language is
-normally indicated by adding a class to the `code` element consisting
-of `language-` followed by the language name.
+Blank lines preceding or following an indented code block
+are not included in it:
```````````````````````````````` example
-```ruby
-def foo(x)
- return 3
-end
-```
+
+
+ foo
+
+
.
-
def foo(x)
- return 3
-end
+
foo
````````````````````````````````
+Trailing spaces or tabs are included in the code block's content:
+
```````````````````````````````` example
-~~~~ ruby startline=3 $%@#$
-def foo(x)
- return 3
-end
-~~~~~~~
+ foo
.
-
def foo(x)
- return 3
-end
+
foo
````````````````````````````````
-```````````````````````````````` example
-````;
-````
-.
-
-````````````````````````````````
+## Fenced code blocks
-[Info strings] for backtick code blocks cannot contain backticks:
+A [code fence](@) is a sequence
+of at least three consecutive backtick characters (`` ` ``) or
+tildes (`~`). (Tildes and backticks cannot be mixed.)
+A [fenced code block](@)
+begins with a code fence, preceded by up to three spaces of indentation.
+
+The line with the opening code fence may optionally contain some text
+following the code fence; this is trimmed of leading and trailing
+spaces or tabs and called the [info string](@). If the [info string] comes
+after a backtick fence, it may not contain any backtick
+characters. (The reason for this restriction is that otherwise
+some inline code would be incorrectly interpreted as the
+beginning of a fenced code block.)
+
+The content of the code block consists of all subsequent lines, until
+a closing [code fence] of the same type as the code block
+began with (backticks or tildes), and with at least as many backticks
+or tildes as the opening code fence. If the leading code fence is
+preceded by N spaces of indentation, then up to N spaces of indentation are
+removed from each line of the content (if present). (If a content line is not
+indented, it is preserved unchanged. If it is indented N spaces or less, all
+of the indentation is removed.)
+
+The closing code fence may be preceded by up to three spaces of indentation, and
+may be followed only by spaces or tabs, which are ignored. If the end of the
+containing block (or document) is reached and no closing code fence
+has been found, the code block contains all of the lines after the
+opening code fence until the end of the containing block (or
+document). (An alternative spec would require backtracking in the
+event that a closing code fence is not found. But this makes parsing
+much less efficient, and there seems to be no real downside to the
+behavior described here.)
+
+A fenced code block may interrupt a paragraph, and does not require
+a blank line either before or after.
+
+The content of a code fence is treated as literal text, not parsed
+as inlines. The first word of the [info string] is typically used to
+specify the language of the code sample, and rendered in the `class`
+attribute of the `code` tag. However, this spec does not mandate any
+particular treatment of the [info string].
+
+Here is a simple example with backticks:
```````````````````````````````` example
-``` aa ```
-foo
+```
+<
+ >
+```
.
-
aa
-foo
+
<
+ >
+
````````````````````````````````
-[Info strings] for tilde code blocks can contain backticks and tildes:
+With tildes:
```````````````````````````````` example
-~~~ aa ``` ~~~
-foo
+~~~
+<
+ >
~~~
.
-
foo
+
<
+ >
````````````````````````````````
+Fewer than three backticks is not enough:
+
+```````````````````````````````` example
+``
+foo
+``
+.
+
foo
+````````````````````````````````
-Closing code fences cannot have [info strings]:
+The closing code fence must use the same character as the opening
+fence:
```````````````````````````````` example
```
-``` aaa
+aaa
+~~~
```
.
-
``` aaa
+
aaa
+~~~
````````````````````````````````
+```````````````````````````````` example
+~~~
+aaa
+```
+~~~
+.
+
aaa
+```
+
+````````````````````````````````
-## HTML blocks
-An [HTML block](@) is a group of lines that is treated
-as raw HTML (and will not be escaped in HTML output).
+The closing code fence must be at least as long as the opening fence:
-There are seven kinds of [HTML block], which can be defined by their
-start and end conditions. The block begins with a line that meets a
-[start condition](@) (after up to three spaces optional indentation).
-It ends with the first subsequent line that meets a matching [end
-condition](@), or the last line of the document, or the last line of
-the [container block](#container-blocks) containing the current HTML
-block, if no line is encountered that meets the [end condition]. If
-the first line meets both the [start condition] and the [end
-condition], the block will contain just that line.
+```````````````````````````````` example
+````
+aaa
+```
+``````
+.
+
aaa
+```
+
+````````````````````````````````
-1. **Start condition:** line begins with the string ``, `
`, or `` (case-insensitive; it
-need not match the start tag).
-2. **Start condition:** line begins with the string ``.
+```````````````````````````````` example
+~~~~
+aaa
+~~~
+~~~~
+.
+
aaa
+~~~
+
+````````````````````````````````
-3. **Start condition:** line begins with the string ``.\
-**End condition:** line contains the string `?>`.
-4. **Start condition:** line begins with the string ``.
+Unclosed code blocks are closed by the end of the document
+(or the enclosing [block quote][block quotes] or [list item][list items]):
-5. **Start condition:** line begins with the string
-``.
+```````````````````````````````` example
+```
+.
+
+````````````````````````````````
-6. **Start condition:** line begins the string `<` or ``
-followed by one of the strings (case-insensitive) `address`,
-`article`, `aside`, `base`, `basefont`, `blockquote`, `body`,
-`caption`, `center`, `col`, `colgroup`, `dd`, `details`, `dialog`,
-`dir`, `div`, `dl`, `dt`, `fieldset`, `figcaption`, `figure`,
-`footer`, `form`, `frame`, `frameset`,
-`h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`,
-`html`, `iframe`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`,
-`nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`,
-`section`, `source`, `summary`, `table`, `tbody`, `td`,
-`tfoot`, `th`, `thead`, `title`, `tr`, `track`, `ul`, followed
-by [whitespace], the end of the line, the string `>`, or
-the string `/>`.\
-**End condition:** line is followed by a [blank line].
-7. **Start condition:** line begins with a complete [open tag]
-(with any [tag name] other than `script`,
-`style`, or `pre`) or a complete [closing tag],
-followed only by [whitespace] or the end of the line.\
-**End condition:** line is followed by a [blank line].
+```````````````````````````````` example
+`````
-HTML blocks continue until they are closed by their appropriate
-[end condition], or the last line of the document or other [container
-block](#container-blocks). This means any HTML **within an HTML
-block** that might otherwise be recognised as a start condition will
-be ignored by the parser and passed through as-is, without changing
-the parser's state.
+```
+aaa
+.
+
` will not affect
-the parser state; as the HTML block was started in by start condition 6, it
-will end at any blank line. This can be surprising:
```````````````````````````````` example
-
-
-**Hello**,
+> ```
+> aaa
-_world_.
-
-
+bbb
.
-
-
-**Hello**,
-
world.
-
-
+
+
aaa
+
+
+
bbb
````````````````````````````````
-In this case, the HTML block is terminated by the newline — the `**Hello**`
-text remains verbatim — and regular parsing resumes, with a paragraph,
-emphasised `world` and inline and block HTML following.
-
-All types of [HTML blocks] except type 7 may interrupt
-a paragraph. Blocks of type 7 may not interrupt a paragraph.
-(This restriction is intended to prevent unwanted interpretation
-of long tags inside a wrapped paragraph as starting HTML blocks.)
-Some simple examples follow. Here are some basic HTML blocks
-of type 6:
+A code block can have all empty lines as its content:
```````````````````````````````` example
-
-
-
- hi
-
-
-
-
-okay.
-.
-
-
-
- hi
-
-
-
-
okay.
-````````````````````````````````
-
+```
-```````````````````````````````` example
-
````````````````````````````````
-Here we have two HTML blocks with a Markdown paragraph between them:
+Fences can be indented. If the opening fence is indented,
+content lines will have equivalent opening indentation removed,
+if present:
```````````````````````````````` example
-
-
-*Markdown*
-
-
+ ```
+ aaa
+aaa
+```
.
-
-
Markdown
-
+
aaa
+aaa
+
````````````````````````````````
-The tag on the first line can be partial, as long
-as it is split where there would be whitespace:
-
```````````````````````````````` example
-
-
+ ```
+aaa
+ aaa
+aaa
+ ```
.
-
-
+
aaa
+aaa
+aaa
+
````````````````````````````````
```````````````````````````````` example
-
-
+ ```
+ aaa
+ aaa
+ aaa
+ ```
.
-
-
+
aaa
+ aaa
+aaa
+
````````````````````````````````
-An open tag need not be closed:
-```````````````````````````````` example
-
-*foo*
+Four spaces of indentation is too many:
-*bar*
+```````````````````````````````` example
+ ```
+ aaa
+ ```
.
-
-*foo*
-
bar
+
```
+aaa
+```
+
````````````````````````````````
-
-A partial tag need not even be completed (garbage
-in, garbage out):
+Closing fences may be preceded by up to three spaces of indentation, and their
+indentation need not match that of the opening fence:
```````````````````````````````` example
-
aaa
+
````````````````````````````````
```````````````````````````````` example
-
aaa
+
````````````````````````````````
-The initial tag doesn't even need to be a valid
-tag, as long as it starts like one:
+This is not a closing fence, because it is indented 4 spaces:
```````````````````````````````` example
-
aaa
+ ```
+
````````````````````````````````
-In type 6 blocks, the initial tag need not be on a line by
-itself:
+
+Code fences (opening and closing) cannot contain internal spaces or tabs:
```````````````````````````````` example
-
````````````````````````````````
```````````````````````````````` example
-
-foo
-
+~~~~~~
+aaa
+~~~ ~~
.
-
-foo
-
+
aaa
+~~~ ~~
+
````````````````````````````````
-Everything until the next blank line or end of document
-gets included in the HTML block. So, in the following
-example, what looks like a Markdown code block
-is actually part of the HTML block, which continues until a blank
-line or the end of the document is reached:
+Fenced code blocks can interrupt paragraphs, and can be followed
+directly by paragraphs, without a blank line between:
```````````````````````````````` example
-
-``` c
-int x = 33;
+foo
```
-.
-
-``` c
-int x = 33;
+bar
```
+baz
+.
+
foo
+
bar
+
+
baz
````````````````````````````````
-To start an [HTML block] with a tag that is *not* in the
-list of block-level tags in (6), you must put the tag by
-itself on the first line (and it must be complete):
+Other blocks can also occur before and after fenced code blocks
+without an intervening blank line:
```````````````````````````````` example
-
-*bar*
-
+foo
+---
+~~~
+bar
+~~~
+# baz
.
-
-*bar*
-
+
foo
+
bar
+
+
baz
````````````````````````````````
-In type 7 blocks, the [tag name] can be anything:
+An [info string] can be provided after the opening code fence.
+Although this spec doesn't mandate any particular treatment of
+the info string, the first word is typically used to specify
+the language of the code block. In HTML output, the language is
+normally indicated by adding a class to the `code` element consisting
+of `language-` followed by the language name.
```````````````````````````````` example
-
-*bar*
-
+```ruby
+def foo(x)
+ return 3
+end
+```
.
-
-*bar*
-
+
````````````````````````````````
-These rules are designed to allow us to work with tags that
-can function as either block-level or inline-level tags.
-The `` tag is a nice example. We can surround content with
-`` tags in three different ways. In this case, we get a raw
-HTML block, because the `` tag is on a line by itself:
+[Info strings] for backtick code blocks cannot contain backticks:
```````````````````````````````` example
-
-*foo*
-
+``` aa ```
+foo
.
-
-*foo*
-
+
aa
+foo
````````````````````````````````
-In this case, we get a raw HTML block that just includes
-the `` tag (because it ends with the following blank
-line). So the contents get interpreted as CommonMark:
+[Info strings] for tilde code blocks can contain backticks and tildes:
```````````````````````````````` example
-
-
-*foo*
-
-
+~~~ aa ``` ~~~
+foo
+~~~
.
-
-
foo
-
+
foo
+
````````````````````````````````
-Finally, in this case, the `` tags are interpreted
-as [raw HTML] *inside* the CommonMark paragraph. (Because
-the tag is not on a line by itself, we get inline HTML
-rather than an [HTML block].)
+Closing code fences cannot have [info strings]:
```````````````````````````````` example
-*foo*
+```
+``` aaa
+```
.
-
foo
+
``` aaa
+
````````````````````````````````
-HTML tags designed to contain literal content
-(`script`, `style`, `pre`), comments, processing instructions,
-and declarations are treated somewhat differently.
-Instead of ending at the first blank line, these blocks
-end at the first line containing a corresponding end tag.
-As a result, these blocks can contain blank lines:
-A pre tag (type 1):
+## HTML blocks
-```````````````````````````````` example
-
-import Text.HTML.TagSoup
+An [HTML block](@) is a group of lines that is treated
+as raw HTML (and will not be escaped in HTML output).
-main :: IO ()
-main = print $ parseTags tags
-
-okay
-.
-
-import Text.HTML.TagSoup
+There are seven kinds of [HTML block], which can be defined by their
+start and end conditions. The block begins with a line that meets a
+[start condition](@) (after up to three optional spaces of indentation).
+It ends with the first subsequent line that meets a matching
+[end condition](@), or the last line of the document, or the last line of
+the [container block](#container-blocks) containing the current HTML
+block, if no line is encountered that meets the [end condition]. If
+the first line meets both the [start condition] and the [end
+condition], the block will contain just that line.
-main :: IO ()
-main = print $ parseTags tags
-
-
okay
-````````````````````````````````
+1. **Start condition:** line begins with the string `
`, or the end of the line.\
+**End condition:** line contains an end tag
+`
`, ``, ``, or `` (case-insensitive; it
+need not match the start tag).
+2. **Start condition:** line begins with the string ``.
-A script tag (type 1):
+3. **Start condition:** line begins with the string ``.\
+**End condition:** line contains the string `?>`.
-```````````````````````````````` example
-
-okay
-.
-
-
okay
-````````````````````````````````
+6. **Start condition:** line begins with the string `<` or ``
+followed by one of the strings (case-insensitive) `address`,
+`article`, `aside`, `base`, `basefont`, `blockquote`, `body`,
+`caption`, `center`, `col`, `colgroup`, `dd`, `details`, `dialog`,
+`dir`, `div`, `dl`, `dt`, `fieldset`, `figcaption`, `figure`,
+`footer`, `form`, `frame`, `frameset`,
+`h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`,
+`html`, `iframe`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`,
+`nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`,
+`search`, `section`, `summary`, `table`, `tbody`, `td`,
+`tfoot`, `th`, `thead`, `title`, `tr`, `track`, `ul`, followed
+by a space, a tab, the end of the line, the string `>`, or
+the string `/>`.\
+**End condition:** line is followed by a [blank line].
+7. **Start condition:** line begins with a complete [open tag]
+(with any [tag name] other than `pre`, `script`,
+`style`, or `textarea`) or a complete [closing tag],
+followed by zero or more spaces and tabs, followed by the end of the line.\
+**End condition:** line is followed by a [blank line].
-A style tag (type 1):
+HTML blocks continue until they are closed by their appropriate
+[end condition], or the last line of the document or other [container
+block](#container-blocks). This means any HTML **within an HTML
+block** that might otherwise be recognised as a start condition will
+be ignored by the parser and passed through as-is, without changing
+the parser's state.
+
+For instance, `
` within an HTML block started by `
` will not affect
+the parser state; as the HTML block was started in by start condition 6, it
+will end at any blank line. This can be surprising:
```````````````````````````````` example
-
-okay
+_world_.
+
+
.
-
-
okay
+
+
+**Hello**,
+
world.
+
+
````````````````````````````````
+In this case, the HTML block is terminated by the blank line — the `**Hello**`
+text remains verbatim — and regular parsing resumes, with a paragraph,
+emphasised `world` and inline and block HTML following.
-If there is no matching end tag, the block will end at the
-end of the document (or the enclosing [block quote][block quotes]
-or [list item][list items]):
-
-```````````````````````````````` example
-
+
*foo*
.
-
-
foo
+
+*foo*
````````````````````````````````
+Here we have two HTML blocks with a Markdown paragraph between them:
+
```````````````````````````````` example
-*bar*
-*baz*
+
+
+*Markdown*
+
+
.
-*bar*
-
baz
+
+
Markdown
+
````````````````````````````````
-Note that anything on the last line after the
-end tag will be included in the [HTML block]:
+The tag on the first line can be partial, as long
+as it is split where there would be whitespace:
```````````````````````````````` example
-1. *bar*
+
+
.
-1. *bar*
+
+
````````````````````````````````
-A comment (type 2):
+```````````````````````````````` example
+
+
+.
+
+
+````````````````````````````````
+
+An open tag need not be closed:
```````````````````````````````` example
-
-okay
+*bar*
.
-
-
okay
+
+*foo*
+
bar
````````````````````````````````
-A processing instruction (type 3):
+A partial tag need not even be completed (garbage
+in, garbage out):
```````````````````````````````` example
-';
-
-?>
-okay
+
````````````````````````````````
```````````````````````````````` example
-
-
-
+
+foo
+
.
-
-
<div>
-
+
+foo
+
````````````````````````````````
-An HTML block of types 1--6 can interrupt a paragraph, and need not be
-preceded by a blank line.
+Everything until the next blank line or end of document
+gets included in the HTML block. So, in the following
+example, what looks like a Markdown code block
+is actually part of the HTML block, which continues until a blank
+line or the end of the document is reached:
```````````````````````````````` example
-Foo
-
-bar
-
+
+``` c
+int x = 33;
+```
.
-
Foo
-
-bar
-
+
+``` c
+int x = 33;
+```
````````````````````````````````
-However, a following blank line is needed, except at the end of
-a document, and except for blocks of types 1--5, [above][HTML
-block]:
+To start an [HTML block] with a tag that is *not* in the
+list of block-level tags in (6), you must put the tag by
+itself on the first line (and it must be complete):
```````````````````````````````` example
-
-*foo*
+
+*bar*
+
````````````````````````````````
-HTML blocks of type 7 cannot interrupt a paragraph:
+In type 7 blocks, the [tag name] can be anything:
```````````````````````````````` example
-Foo
-
-baz
+
+*bar*
+
.
-
+
+*bar*
+
````````````````````````````````
-This rule differs from John Gruber's original Markdown syntax
-specification, which says:
+```````````````````````````````` example
+
+*bar*
+
+.
+
+*bar*
+
+````````````````````````````````
-> The only restrictions are that block-level HTML elements —
-> e.g. `
`, `
`, `
`, `
`, etc. — must be separated from
-> surrounding content by blank lines, and the start and end tags of the
-> block should not be indented with tabs or spaces.
-In some ways Gruber's rule is more restrictive than the one given
-here:
+```````````````````````````````` example
+
+*bar*
+.
+
+*bar*
+````````````````````````````````
-- It requires that an HTML block be preceded by a blank line.
-- It does not allow the start tag to be indented.
-- It requires a matching end tag, which it also does not allow to
- be indented.
-Most Markdown implementations (including some of Gruber's own) do not
-respect all of these restrictions.
+These rules are designed to allow us to work with tags that
+can function as either block-level or inline-level tags.
+The `` tag is a nice example. We can surround content with
+`` tags in three different ways. In this case, we get a raw
+HTML block, because the `` tag is on a line by itself:
-There is one respect, however, in which Gruber's rule is more liberal
-than the one given here, since it allows blank lines to occur inside
-an HTML block. There are two reasons for disallowing them here.
-First, it removes the need to parse balanced tags, which is
-expensive and can require backtracking from the end of the document
-if no matching end tag is found. Second, it provides a very simple
-and flexible way of including Markdown content inside HTML tags:
-simply separate the Markdown from the HTML using blank lines:
+```````````````````````````````` example
+
+*foo*
+
+.
+
+*foo*
+
+````````````````````````````````
-Compare:
+
+In this case, we get a raw HTML block that just includes
+the `` tag (because it ends with the following blank
+line). So the contents get interpreted as CommonMark:
```````````````````````````````` example
-
+
-*Emphasized* text.
+*foo*
-
+
.
-
-
Emphasized text.
-
+
+
foo
+
````````````````````````````````
+Finally, in this case, the `` tags are interpreted
+as [raw HTML] *inside* the CommonMark paragraph. (Because
+the tag is not on a line by itself, we get inline HTML
+rather than an [HTML block].)
+
```````````````````````````````` example
-
-*Emphasized* text.
-
+*foo*
.
-
-*Emphasized* text.
-
+
foo
````````````````````````````````
-Some Markdown implementations have adopted a convention of
-interpreting content inside tags as text if the open tag has
-the attribute `markdown=1`. The rule given above seems a simpler and
-more elegant way of achieving the same expressive power, which is also
-much simpler to parse.
+HTML tags designed to contain literal content
+(`pre`, `script`, `style`, `textarea`), comments, processing instructions,
+and declarations are treated somewhat differently.
+Instead of ending at the first blank line, these blocks
+end at the first line containing a corresponding end tag.
+As a result, these blocks can contain blank lines:
-The main potential drawback is that one can no longer paste HTML
-blocks into Markdown documents with 100% reliability. However,
-*in most cases* this will work fine, because the blank lines in
-HTML are usually followed by HTML block tags. For example:
+A pre tag (type 1):
```````````````````````````````` example
-
+A script tag (type 1):
+
+```````````````````````````````` example
+
+okay
.
-
-
-
-Hi
-
-
-
+
+
okay
````````````````````````````````
-There are problems, however, if the inner tags are indented
-*and* separated by spaces, as then they will be interpreted as
-an indented code block:
+A textarea tag (type 1):
```````````````````````````````` example
-
+
+
.
-
-
-
<td>
- Hi
-</td>
-
-
-
-````````````````````````````````
-
+
+````````````````````````````````
-A [link reference definition]
-does not correspond to a structural element of a document. Instead, it
-defines a label which can be used in [reference links]
-and reference-style [images] elsewhere in the document. [Link
-reference definitions] can come either before or after the links that use
-them.
+A style tag (type 1):
```````````````````````````````` example
-[foo]: /url "title"
+
+okay
.
-
````````````````````````````````
+If there is no matching end tag, the block will end at the
+end of the document (or the enclosing [block quote][block quotes]
+or [list item][list items]):
+
```````````````````````````````` example
- [foo]:
- /url
- 'the title'
+
+*foo*
.
-
````````````````````````````````
-However, it may not contain a [blank line]:
-
```````````````````````````````` example
-[foo]: /url 'title
-
-with blank line'
-
-[foo]
+*bar*
+*baz*
.
-
[foo]: /url 'title
-
with blank line'
-
[foo]
+*bar*
+
baz
````````````````````````````````
-The title may be omitted:
+Note that anything on the last line after the
+end tag will be included in the [HTML block]:
```````````````````````````````` example
-[foo]:
-/url
-
-[foo]
+1. *bar*
.
-
+1. *bar*
````````````````````````````````
-The link destination may not be omitted:
+A comment (type 2):
```````````````````````````````` example
-[foo]:
+
+okay
.
-
[foo]:
-
[foo]
-````````````````````````````````
+
+
okay
+````````````````````````````````
-```````````````````````````````` example
-[foo]: <>
-[foo]
-.
-
-````````````````````````````````
-The title must be separated from the link destination by
-whitespace:
+A processing instruction (type 3):
```````````````````````````````` example
-[foo]: (baz)
+';
+
+?>
+okay
.
-
[foo]: (baz)
-
[foo]
+';
+
+?>
+
okay
````````````````````````````````
-Both title and destination can contain backslash escapes
-and literal backslashes:
+A declaration (type 4):
```````````````````````````````` example
-[foo]: /url\bar\*baz "foo\"bar\baz"
-
-[foo]
+
.
-
+
````````````````````````````````
-A link can come before its corresponding definition:
+CDATA (type 5):
```````````````````````````````` example
-[foo]
-
-[foo]: url
-.
-
````````````````````````````````
-As noted in the section on [Links], matching of labels is
-case-insensitive (see [matches]).
+The opening tag can be preceded by up to three spaces of indentation, but not
+four:
```````````````````````````````` example
-[FOO]: /url
+
-[Foo]
+
.
-
````````````````````````````````
-Here is a link reference definition with no corresponding link.
-It contributes nothing to the document.
+An HTML block of types 1--6 can interrupt a paragraph, and need not be
+preceded by a blank line.
```````````````````````````````` example
-[foo]: /url
+Foo
+
+bar
+
.
+
Foo
+
+bar
+
````````````````````````````````
-Here is another one:
+However, a following blank line is needed, except at the end of
+a document, and except for blocks of types 1--5, [above][HTML
+block]:
```````````````````````````````` example
-[
-foo
-]: /url
+
bar
+
+*foo*
.
-
bar
+
+bar
+
+*foo*
````````````````````````````````
-This is not a link reference definition, because there are
-[non-whitespace characters] after the title:
+HTML blocks of type 7 cannot interrupt a paragraph:
```````````````````````````````` example
-[foo]: /url "title" ok
+Foo
+
+baz
.
-
````````````````````````````````
-This is a link reference definition, but it has no title:
-
-```````````````````````````````` example
-[foo]: /url
-"title" ok
-.
-
"title" ok
-````````````````````````````````
+This rule differs from John Gruber's original Markdown syntax
+specification, which says:
+> The only restrictions are that block-level HTML elements —
+> e.g. `
`, `
`, `
`, `
`, etc. — must be separated from
+> surrounding content by blank lines, and the start and end tags of the
+> block should not be indented with spaces or tabs.
-This is not a link reference definition, because it is indented
-four spaces:
+In some ways Gruber's rule is more restrictive than the one given
+here:
-```````````````````````````````` example
- [foo]: /url "title"
+- It requires that an HTML block be preceded by a blank line.
+- It does not allow the start tag to be indented.
+- It requires a matching end tag, which it also does not allow to
+ be indented.
-[foo]
-.
-
[foo]: /url "title"
-
-
[foo]
-````````````````````````````````
+Most Markdown implementations (including some of Gruber's own) do not
+respect all of these restrictions.
+There is one respect, however, in which Gruber's rule is more liberal
+than the one given here, since it allows blank lines to occur inside
+an HTML block. There are two reasons for disallowing them here.
+First, it removes the need to parse balanced tags, which is
+expensive and can require backtracking from the end of the document
+if no matching end tag is found. Second, it provides a very simple
+and flexible way of including Markdown content inside HTML tags:
+simply separate the Markdown from the HTML using blank lines:
-This is not a link reference definition, because it occurs inside
-a code block:
+Compare:
```````````````````````````````` example
-```
-[foo]: /url
-```
+
-[foo]
+*Emphasized* text.
+
+
.
-
[foo]: /url
-
-
[foo]
+
+
Emphasized text.
+
````````````````````````````````
-A [link reference definition] cannot interrupt a paragraph.
-
```````````````````````````````` example
-Foo
-[bar]: /baz
-
-[bar]
+
+*Emphasized* text.
+
.
-
Foo
-[bar]: /baz
-
[bar]
+
+*Emphasized* text.
+
````````````````````````````````
-However, it can directly follow other block elements, such as headings
-and thematic breaks, and it need not be followed by a blank line.
-
-```````````````````````````````` example
-# [Foo]
-[foo]: /url
-> bar
-.
-
-````````````````````````````````
+Some Markdown implementations have adopted a convention of
+interpreting content inside tags as text if the open tag has
+the attribute `markdown=1`. The rule given above seems a simpler and
+more elegant way of achieving the same expressive power, which is also
+much simpler to parse.
-```````````````````````````````` example
-[foo]: /url
-bar
-===
-[foo]
-.
-
-````````````````````````````````
+The main potential drawback is that one can no longer paste HTML
+blocks into Markdown documents with 100% reliability. However,
+*in most cases* this will work fine, because the blank lines in
+HTML are usually followed by HTML block tags. For example:
```````````````````````````````` example
-[foo]: /url
-===
-[foo]
-.
-
````````````````````````````````
-[Link reference definitions] can occur
-inside block containers, like lists and block quotations. They
-affect the entire document, not just the container in which they
-are defined:
+There are problems, however, if the inner tags are indented
+*and* separated by spaces, as then they will be interpreted as
+an indented code block:
```````````````````````````````` example
-[foo]
+
-Whether something is a [link reference definition] is
-independent of whether the link reference it defines is
-used in the document. Thus, for example, the following
-document contains just a link reference definition, and
-no visible content:
+
-```````````````````````````````` example
-[foo]: /url
+
.
-````````````````````````````````
+
+
+
<td>
+ Hi
+</td>
+
+
+
+````````````````````````````````
-## Paragraphs
+Fortunately, blank lines are usually not necessary and can be
+deleted. The exception is inside `
` tags, but as described
+[above][HTML blocks], raw HTML blocks starting with `
`
+*can* contain blank lines.
-A sequence of non-blank lines that cannot be interpreted as other
-kinds of blocks forms a [paragraph](@).
-The contents of the paragraph are the result of parsing the
-paragraph's raw content as inlines. The paragraph's raw content
-is formed by concatenating the lines and removing initial and final
-[whitespace].
+## Link reference definitions
-A simple example with two paragraphs:
+A [link reference definition](@)
+consists of a [link label], optionally preceded by up to three spaces of
+indentation, followed
+by a colon (`:`), optional spaces or tabs (including up to one
+[line ending]), a [link destination],
+optional spaces or tabs (including up to one
+[line ending]), and an optional [link
+title], which if it is present must be separated
+from the [link destination] by spaces or tabs.
+No further character may occur.
+
+A [link reference definition]
+does not correspond to a structural element of a document. Instead, it
+defines a label which can be used in [reference links]
+and reference-style [images] elsewhere in the document. [Link
+reference definitions] can come either before or after the links that use
+them.
```````````````````````````````` example
-aaa
+[foo]: /url "title"
-bbb
+[foo]
.
-
````````````````````````````````
-Lines after the first may be indented any amount, since indented
-code blocks cannot interrupt paragraphs.
+The title may extend over multiple lines:
```````````````````````````````` example
-aaa
- bbb
- ccc
+[foo]: /url '
+title
+line1
+line2
+'
+
+[foo]
.
-
````````````````````````````````
-However, the first line may be indented at most three spaces,
-or an indented code block will be triggered:
+However, it may not contain a [blank line]:
```````````````````````````````` example
- aaa
-bbb
+[foo]: /url 'title
+
+with blank line'
+
+[foo]
.
-
aaa
-bbb
+
[foo]: /url 'title
+
with blank line'
+
[foo]
````````````````````````````````
+The title may be omitted:
+
```````````````````````````````` example
- aaa
-bbb
+[foo]:
+/url
+
+[foo]
.
-
````````````````````````````````
-Final spaces are stripped before inline parsing, so a paragraph
-that ends with two or more spaces will not end with a [hard line
-break]:
+The link destination may not be omitted:
```````````````````````````````` example
-aaa
-bbb
+[foo]:
+
+[foo]
.
-
aaa
-bbb
+
[foo]:
+
[foo]
````````````````````````````````
+ However, an empty link destination may be specified using
+ angle brackets:
-## Blank lines
+```````````````````````````````` example
+[foo]: <>
-[Blank lines] between block-level elements are ignored,
-except for the role they play in determining whether a [list]
-is [tight] or [loose].
+[foo]
+.
+
+````````````````````````````````
-Blank lines at the beginning and end of the document are also ignored.
+The title must be separated from the link destination by
+spaces or tabs:
```````````````````````````````` example
-
-
-aaa
-
-
-# aaa
+[foo]: (baz)
-
+[foo]
.
-
aaa
-
aaa
+
[foo]: (baz)
+
[foo]
````````````````````````````````
+Both title and destination can contain backslash escapes
+and literal backslashes:
-# Container blocks
+```````````````````````````````` example
+[foo]: /url\bar\*baz "foo\"bar\baz"
-A [container block](#container-blocks) is a block that has other
-blocks as its contents. There are two basic kinds of container blocks:
-[block quotes] and [list items].
-[Lists] are meta-containers for [list items].
+[foo]
+.
+
+````````````````````````````````
-We define the syntax for container blocks recursively. The general
-form of the definition is:
-> If X is a sequence of blocks, then the result of
-> transforming X in such-and-such a way is a container of type Y
-> with these blocks as its content.
+A link can come before its corresponding definition:
-So, we explain what counts as a block quote or list item by explaining
-how these can be *generated* from their contents. This should suffice
-to define the syntax, although it does not give a recipe for *parsing*
-these constructions. (A recipe is provided below in the section entitled
-[A parsing strategy](#appendix-a-parsing-strategy).)
+```````````````````````````````` example
+[foo]
-## Block quotes
+[foo]: url
+.
+
+````````````````````````````````
-A [block quote marker](@)
-consists of 0-3 spaces of initial indent, plus (a) the character `>` together
-with a following space, or (b) a single character `>` not followed by a space.
-The following rules define [block quotes]:
+If there are several matching definitions, the first one takes
+precedence:
-1. **Basic case.** If a string of lines *Ls* constitute a sequence
- of blocks *Bs*, then the result of prepending a [block quote
- marker] to the beginning of each line in *Ls*
- is a [block quote](#block-quotes) containing *Bs*.
+```````````````````````````````` example
+[foo]
-2. **Laziness.** If a string of lines *Ls* constitute a [block
- quote](#block-quotes) with contents *Bs*, then the result of deleting
- the initial [block quote marker] from one or
- more lines in which the next [non-whitespace character] after the [block
- quote marker] is [paragraph continuation
- text] is a block quote with *Bs* as its content.
- [Paragraph continuation text](@) is text
- that will be parsed as part of the content of a paragraph, but does
- not occur at the beginning of the paragraph.
+[foo]: first
+[foo]: second
+.
+
+````````````````````````````````
-3. **Consecutiveness.** A document cannot contain two [block
- quotes] in a row unless there is a [blank line] between them.
-Nothing else counts as a [block quote](#block-quotes).
+As noted in the section on [Links], matching of labels is
+case-insensitive (see [matches]).
+
+```````````````````````````````` example
+[FOO]: /url
+
+[Foo]
+.
+
````````````````````````````````
-The spaces after the `>` characters can be omitted:
+Whether something is a [link reference definition] is
+independent of whether the link reference it defines is
+used in the document. Thus, for example, the following
+document contains just a link reference definition, and
+no visible content:
```````````````````````````````` example
-># Foo
->bar
-> baz
+[foo]: /url
.
-
-
Foo
-
bar
-baz
-
````````````````````````````````
-The `>` characters can be indented 1-3 spaces:
+Here is another one:
```````````````````````````````` example
- > # Foo
- > bar
- > baz
-.
-
-
Foo
-
bar
-baz
-
+[
+foo
+]: /url
+bar
+.
+
bar
````````````````````````````````
-Four spaces gives us a code block:
+This is not a link reference definition, because there are
+characters other than spaces or tabs after the title:
```````````````````````````````` example
- > # Foo
- > bar
- > baz
+[foo]: /url "title" ok
.
-
> # Foo
-> bar
-> baz
-
+
[foo]: /url "title" ok
````````````````````````````````
-The Laziness clause allows us to omit the `>` before
-[paragraph continuation text]:
+This is a link reference definition, but it has no title:
```````````````````````````````` example
-> # Foo
-> bar
-baz
+[foo]: /url
+"title" ok
.
-
-
Foo
-
bar
-baz
-
+
"title" ok
````````````````````````````````
-A block quote can contain some lazy and some non-lazy
-continuation lines:
+This is not a link reference definition, because it is indented
+four spaces:
```````````````````````````````` example
-> bar
-baz
-> foo
+ [foo]: /url "title"
+
+[foo]
.
-
-
bar
-baz
-foo
-
+
[foo]: /url "title"
+
+
[foo]
````````````````````````````````
-Laziness only applies to lines that would have been continuations of
-paragraphs had they been prepended with [block quote markers].
-For example, the `> ` cannot be omitted in the second line of
+This is not a link reference definition, because it occurs inside
+a code block:
-``` markdown
-> foo
-> ---
+```````````````````````````````` example
+```
+[foo]: /url
```
-without changing the meaning:
-
-```````````````````````````````` example
-> foo
----
+[foo]
.
-
-
foo
-
-
+
[foo]: /url
+
+
[foo]
````````````````````````````````
-Similarly, if we omit the `> ` in the second line of
-
-``` markdown
-> - foo
-> - bar
-```
-
-then the block quote ends after the first line:
+A [link reference definition] cannot interrupt a paragraph.
```````````````````````````````` example
-> - foo
-- bar
+Foo
+[bar]: /baz
+
+[bar]
.
-
-
-
foo
-
-
-
-
bar
-
+
Foo
+[bar]: /baz
+
[bar]
````````````````````````````````
-For the same reason, we can't omit the `> ` in front of
-subsequent lines of an indented or fenced code block:
+However, it can directly follow other block elements, such as headings
+and thematic breaks, and it need not be followed by a blank line.
```````````````````````````````` example
-> foo
- bar
+# [Foo]
+[foo]: /url
+> bar
.
+
````````````````````````````````
-
-Note that in the following case, we have a [lazy
-continuation line]:
-
```````````````````````````````` example
-> foo
- - bar
+[foo]: /url
+===
+[foo]
.
-
````````````````````````````````
-To see why, note that in
+Several [link reference definitions]
+can occur one after another, without intervening blank lines.
-```markdown
-> foo
-> - bar
-```
+```````````````````````````````` example
+[foo]: /foo-url "foo"
+[bar]: /bar-url
+ "bar"
+[baz]: /baz-url
-the `- bar` is indented too far to start a list, and can't
-be an indented code block because indented code blocks cannot
-interrupt paragraphs, so it is [paragraph continuation text].
+[foo],
+[bar],
+[baz]
+.
+
+````````````````````````````````
-A block quote can be empty:
+
+[Link reference definitions] can occur
+inside block containers, like lists and block quotations. They
+affect the entire document, not just the container in which they
+are defined:
```````````````````````````````` example
->
+[foo]
+
+> [foo]: /url
.
+
````````````````````````````````
+## Paragraphs
+
+A sequence of non-blank lines that cannot be interpreted as other
+kinds of blocks forms a [paragraph](@).
+The contents of the paragraph are the result of parsing the
+paragraph's raw content as inlines. The paragraph's raw content
+is formed by concatenating the lines and removing initial and final
+spaces or tabs.
+
+A simple example with two paragraphs:
+
```````````````````````````````` example
->
->
->
+aaa
+
+bbb
.
-
-
+
aaa
+
bbb
````````````````````````````````
-A block quote can have initial or final blank lines:
+Paragraphs can contain multiple lines, but no blank lines:
```````````````````````````````` example
->
-> foo
->
+aaa
+bbb
+
+ccc
+ddd
.
-
-
foo
-
+
aaa
+bbb
+
ccc
+ddd
````````````````````````````````
-A blank line always separates block quotes:
+Multiple blank lines between paragraphs have no effect:
```````````````````````````````` example
-> foo
+aaa
-> bar
+
+bbb
.
-
-
foo
-
-
-
bar
-
+
aaa
+
bbb
````````````````````````````````
-(Most current Markdown implementations, including John Gruber's
-original `Markdown.pl`, will parse this example as a single block quote
-with two paragraphs. But it seems better to allow the author to decide
-whether two block quotes or one are wanted.)
-
-Consecutiveness means that if we put these block quotes together,
-we get a single block quote:
+Leading spaces or tabs are skipped:
```````````````````````````````` example
-> foo
-> bar
+ aaa
+ bbb
.
-
-
foo
-bar
-
+
aaa
+bbb
````````````````````````````````
-To get a block quote with two paragraphs, use:
+Lines after the first may be indented any amount, since indented
+code blocks cannot interrupt paragraphs.
```````````````````````````````` example
-> foo
->
-> bar
+aaa
+ bbb
+ ccc
.
-
-
foo
-
bar
-
+
aaa
+bbb
+ccc
````````````````````````````````
-Block quotes can interrupt paragraphs:
+However, the first line may be preceded by up to three spaces of indentation.
+Four spaces of indentation is too many:
```````````````````````````````` example
-foo
+ aaa
+bbb
+.
+
aaa
+bbb
+````````````````````````````````
+
+
+```````````````````````````````` example
+ aaa
+bbb
+.
+
aaa
+
+
bbb
+````````````````````````````````
+
+
+Final spaces or tabs are stripped before inline parsing, so a paragraph
+that ends with two or more spaces will not end with a [hard line
+break]:
+
+```````````````````````````````` example
+aaa
+bbb
+.
+
aaa
+bbb
+````````````````````````````````
+
+
+## Blank lines
+
+[Blank lines] between block-level elements are ignored,
+except for the role they play in determining whether a [list]
+is [tight] or [loose].
+
+Blank lines at the beginning and end of the document are also ignored.
+
+```````````````````````````````` example
+
+
+aaa
+
+
+# aaa
+
+
+.
+
aaa
+
aaa
+````````````````````````````````
+
+
+
+# Container blocks
+
+A [container block](#container-blocks) is a block that has other
+blocks as its contents. There are two basic kinds of container blocks:
+[block quotes] and [list items].
+[Lists] are meta-containers for [list items].
+
+We define the syntax for container blocks recursively. The general
+form of the definition is:
+
+> If X is a sequence of blocks, then the result of
+> transforming X in such-and-such a way is a container of type Y
+> with these blocks as its content.
+
+So, we explain what counts as a block quote or list item by explaining
+how these can be *generated* from their contents. This should suffice
+to define the syntax, although it does not give a recipe for *parsing*
+these constructions. (A recipe is provided below in the section entitled
+[A parsing strategy](#appendix-a-parsing-strategy).)
+
+## Block quotes
+
+A [block quote marker](@),
+optionally preceded by up to three spaces of indentation,
+consists of (a) the character `>` together with a following space of
+indentation, or (b) a single character `>` not followed by a space of
+indentation.
+
+The following rules define [block quotes]:
+
+1. **Basic case.** If a string of lines *Ls* constitute a sequence
+ of blocks *Bs*, then the result of prepending a [block quote
+ marker] to the beginning of each line in *Ls*
+ is a [block quote](#block-quotes) containing *Bs*.
+
+2. **Laziness.** If a string of lines *Ls* constitute a [block
+ quote](#block-quotes) with contents *Bs*, then the result of deleting
+ the initial [block quote marker] from one or
+ more lines in which the next character other than a space or tab after the
+ [block quote marker] is [paragraph continuation
+ text] is a block quote with *Bs* as its content.
+ [Paragraph continuation text](@) is text
+ that will be parsed as part of the content of a paragraph, but does
+ not occur at the beginning of the paragraph.
+
+3. **Consecutiveness.** A document cannot contain two [block
+ quotes] in a row unless there is a [blank line] between them.
+
+Nothing else counts as a [block quote](#block-quotes).
+
+Here is a simple example:
+
+```````````````````````````````` example
+> # Foo
> bar
+> baz
.
-
foo
-
bar
+
Foo
+
bar
+baz
````````````````````````````````
-In general, blank lines are not needed before or after block
-quotes:
+The space or tab after the `>` characters can be omitted:
```````````````````````````````` example
-> aaa
-***
-> bbb
+># Foo
+>bar
+> baz
.
-
aaa
-
-
-
-
bbb
+
Foo
+
bar
+baz
````````````````````````````````
-However, because of laziness, a blank line is needed between
-a block quote and a following paragraph:
+The `>` characters can be preceded by up to three spaces of indentation:
```````````````````````````````` example
-> bar
-baz
+ > # Foo
+ > bar
+ > baz
.
+
Foo
bar
baz
````````````````````````````````
+Four spaces of indentation is too many:
+
```````````````````````````````` example
-> bar
+ > # Foo
+ > bar
+ > baz
+.
+
> # Foo
+> bar
+> baz
+
+````````````````````````````````
+
+
+The Laziness clause allows us to omit the `>` before
+[paragraph continuation text]:
+```````````````````````````````` example
+> # Foo
+> bar
baz
.
-
bar
+
Foo
+
bar
+baz
-
baz
````````````````````````````````
+A block quote can contain some lazy and some non-lazy
+continuation lines:
+
```````````````````````````````` example
> bar
->
baz
+> foo
.
-
bar
+
bar
+baz
+foo
-
baz
````````````````````````````````
-It is a consequence of the Laziness rule that any number
-of initial `>`s may be omitted on a continuation line of a
-nested block quote:
+Laziness only applies to lines that would have been continuations of
+paragraphs had they been prepended with [block quote markers].
+For example, the `> ` cannot be omitted in the second line of
+
+``` markdown
+> foo
+> ---
+```
+
+without changing the meaning:
```````````````````````````````` example
-> > > foo
-bar
+> foo
+---
.
-
-
-
foo
-bar
-
-
+
foo
+
````````````````````````````````
+Similarly, if we omit the `> ` in the second line of
+
+``` markdown
+> - foo
+> - bar
+```
+
+then the block quote ends after the first line:
+
```````````````````````````````` example
->>> foo
-> bar
->>baz
+> - foo
+- bar
.
-
-
-
foo
-bar
-baz
-
-
+
+
foo
+
+
+
bar
+
````````````````````````````````
-When including an indented code block in a block quote,
-remember that the [block quote marker] includes
-both the `>` and a following space. So *five spaces* are needed after
-the `>`:
+For the same reason, we can't omit the `> ` in front of
+subsequent lines of an indented or fenced code block:
```````````````````````````````` example
-> code
-
-> not code
+> foo
+ bar
.
-
code
+
foo
+
bar
+
+````````````````````````````````
+
+
+```````````````````````````````` example
+> ```
+foo
+```
+.
-
not code
+
+
foo
+
````````````````````````````````
+Note that in the following case, we have a [lazy
+continuation line]:
-## List items
+```````````````````````````````` example
+> foo
+ - bar
+.
+
+
foo
+- bar
+
+````````````````````````````````
+
+
+To see why, note that in
+
+```markdown
+> foo
+> - bar
+```
+
+the `- bar` is indented too far to start a list, and can't
+be an indented code block because indented code blocks cannot
+interrupt paragraphs, so it is [paragraph continuation text].
+
+A block quote can be empty:
+
+```````````````````````````````` example
+>
+.
+
+
+````````````````````````````````
+
+
+```````````````````````````````` example
+>
+>
+>
+.
+
+
+````````````````````````````````
+
+
+A block quote can have initial or final blank lines:
+
+```````````````````````````````` example
+>
+> foo
+>
+.
+
+
foo
+
+````````````````````````````````
+
+
+A blank line always separates block quotes:
+
+```````````````````````````````` example
+> foo
+
+> bar
+.
+
+
foo
+
+
+
bar
+
+````````````````````````````````
+
+
+(Most current Markdown implementations, including John Gruber's
+original `Markdown.pl`, will parse this example as a single block quote
+with two paragraphs. But it seems better to allow the author to decide
+whether two block quotes or one are wanted.)
+
+Consecutiveness means that if we put these block quotes together,
+we get a single block quote:
+
+```````````````````````````````` example
+> foo
+> bar
+.
+
+
foo
+bar
+
+````````````````````````````````
+
+
+To get a block quote with two paragraphs, use:
+
+```````````````````````````````` example
+> foo
+>
+> bar
+.
+
+
foo
+
bar
+
+````````````````````````````````
+
+
+Block quotes can interrupt paragraphs:
+
+```````````````````````````````` example
+foo
+> bar
+.
+
foo
+
+
bar
+
+````````````````````````````````
+
+
+In general, blank lines are not needed before or after block
+quotes:
+
+```````````````````````````````` example
+> aaa
+***
+> bbb
+.
+
+
aaa
+
+
+
+
bbb
+
+````````````````````````````````
+
+
+However, because of laziness, a blank line is needed between
+a block quote and a following paragraph:
+
+```````````````````````````````` example
+> bar
+baz
+.
+
+
bar
+baz
+
+````````````````````````````````
+
+
+```````````````````````````````` example
+> bar
+
+baz
+.
+
+
bar
+
+
baz
+````````````````````````````````
+
+
+```````````````````````````````` example
+> bar
+>
+baz
+.
+
+
bar
+
+
baz
+````````````````````````````````
+
+
+It is a consequence of the Laziness rule that any number
+of initial `>`s may be omitted on a continuation line of a
+nested block quote:
+
+```````````````````````````````` example
+> > > foo
+bar
+.
+
+
+
+
foo
+bar
+
+
+
+````````````````````````````````
+
+
+```````````````````````````````` example
+>>> foo
+> bar
+>>baz
+.
+
+
+
+
foo
+bar
+baz
+
+
+
+````````````````````````````````
+
+
+When including an indented code block in a block quote,
+remember that the [block quote marker] includes
+both the `>` and a following space of indentation. So *five spaces* are needed
+after the `>`:
+
+```````````````````````````````` example
+> code
+
+> not code
+.
+
+
code
+
+
+
+
not code
+
+````````````````````````````````
+
+
+
+## List items
A [list marker](@) is a
[bullet list marker] or an [ordered list marker].
@@ -3755,10 +4111,10 @@ in some browsers.)
The following rules define [list items]:
1. **Basic case.** If a sequence of lines *Ls* constitute a sequence of
- blocks *Bs* starting with a [non-whitespace character], and *M* is a
- list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces, then the result
- of prepending *M* and the following spaces to the first line of
- *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a
+ blocks *Bs* starting with a character other than a space or tab, and *M* is
+ a list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces of indentation,
+ then the result of prepending *M* and the following spaces to the first line
+ of *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a
list item with *Bs* as its contents. The type of the list item
(bullet or ordered) is determined by the type of its list marker.
If the list item is ordered, then it is also assigned a start
@@ -3823,8 +4179,8 @@ with two lines.
The most important thing to notice is that the position of
the text after the list marker determines how much indentation
is needed in subsequent blocks in the list item. If the list
-marker takes up two spaces, and there are three spaces between
-the list marker and the next [non-whitespace character], then blocks
+marker takes up two spaces of indentation, and there are three spaces between
+the list marker and the next character other than a space or tab, then blocks
must be indented five spaces in order to fall under the list
item.
@@ -3885,10 +4241,10 @@ put under the list item:
It is tempting to think of this in terms of columns: the continuation
-blocks must be indented at least to the column of the first
-[non-whitespace character] after the list marker. However, that is not quite right.
-The spaces after the list marker determine how much relative indentation
-is needed. Which column this indentation reaches will depend on
+blocks must be indented at least to the column of the first character other than
+a space or tab after the list marker. However, that is not quite right.
+The spaces of indentation after the list marker determine how much relative
+indentation is needed. Which column this indentation reaches will depend on
how the list item is embedded in other constructions, as shown by
this example:
@@ -3935,7 +4291,7 @@ far enough past the blockquote marker:
````````````````````````````````
-Note that at least one space is needed between the list marker and
+Note that at least one space or tab is needed between the list marker and
any following content, so these are not list items:
```````````````````````````````` example
@@ -4067,16 +4423,16 @@ A start number may not be negative:
2. **Item starting with indented code.** If a sequence of lines *Ls*
constitute a sequence of blocks *Bs* starting with an indented code
block, and *M* is a list marker of width *W* followed by
- one space, then the result of prepending *M* and the following
- space to the first line of *Ls*, and indenting subsequent lines of
- *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents.
+ one space of indentation, then the result of prepending *M* and the
+ following space to the first line of *Ls*, and indenting subsequent lines
+ of *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents.
If a line is empty, then it need not be indented. The type of the
list item (bullet or ordered) is determined by the type of its list
marker. If the list item is ordered, then it is also assigned a
start number, based on the ordered list marker.
-An indented code block will have to be indented four spaces beyond
-the edge of the region where text will be included in the list item.
+An indented code block will have to be preceded by four spaces of indentation
+beyond the edge of the region where text will be included in the list item.
In the following case that is 6 spaces:
```````````````````````````````` example
@@ -4112,8 +4468,8 @@ And in this case it is 11 spaces:
If the *first* block in the list item is an indented code block,
-then by rule #2, the contents must be indented *one* space after the
-list marker:
+then by rule #2, the contents must be preceded by *one* space of indentation
+after the list marker:
```````````````````````````````` example
indented code
@@ -4149,7 +4505,7 @@ paragraph
````````````````````````````````
-Note that an additional space indent is interpreted as space
+Note that an additional space of indentation is interpreted as space
inside the code block:
```````````````````````````````` example
@@ -4173,10 +4529,10 @@ inside the code block:
Note that rules #1 and #2 only apply to two cases: (a) cases
in which the lines to be included in a list item begin with a
-[non-whitespace character], and (b) cases in which
+character other than a space or tab, and (b) cases in which
they begin with an indented code
block. In a case like the following, where the first block begins with
-a three-space indent, the rules do not allow us to form a list item by
+three spaces of indentation, the rules do not allow us to form a list item by
indenting the whole thing and prepending a list marker:
```````````````````````````````` example
@@ -4201,8 +4557,8 @@ bar
````````````````````````````````
-This is not a significant restriction, because when a block begins
-with 1-3 spaces indent, the indentation can always be removed without
+This is not a significant restriction, because when a block is preceded by up to
+three spaces of indentation, the indentation can always be removed without
a change in interpretation, allowing rule #1 to be applied. So, in
the above case:
@@ -4222,11 +4578,10 @@ the above case:
3. **Item starting with a blank line.** If a sequence of lines *Ls*
starting with a single [blank line] constitute a (possibly empty)
- sequence of blocks *Bs*, not separated from each other by more than
- one blank line, and *M* is a list marker of width *W*,
+ sequence of blocks *Bs*, and *M* is a list marker of width *W*,
then the result of prepending *M* to the first line of *Ls*, and
- indenting subsequent lines of *Ls* by *W + 1* spaces, is a list
- item with *Bs* as its contents.
+ preceding subsequent lines of *Ls* by *W + 1* spaces of indentation, is a
+ list item with *Bs* as its contents.
If a line is empty, then it need not be indented. The type of the
list item (bullet or ordered) is determined by the type of its list
marker. If the list item is ordered, then it is also assigned a
@@ -4301,7 +4656,7 @@ Here is an empty bullet list item:
````````````````````````````````
-It does not matter whether there are spaces following the [list marker]:
+It does not matter whether there are spaces or tabs following the [list marker]:
```````````````````````````````` example
- foo
@@ -4358,9 +4713,9 @@ foo
4. **Indentation.** If a sequence of lines *Ls* constitutes a list item
- according to rule #1, #2, or #3, then the result of indenting each line
- of *Ls* by 1-3 spaces (the same for each line) also constitutes a
- list item with the same contents and attributes. If a line is
+ according to rule #1, #2, or #3, then the result of preceding each line
+ of *Ls* by up to three spaces of indentation (the same for each line) also
+ constitutes a list item with the same contents and attributes. If a line is
empty, then it need not be indented.
Indented one space:
@@ -4459,7 +4814,7 @@ Four spaces indent gives a code block:
5. **Laziness.** If a string of lines *Ls* constitute a [list
item](#list-items) with contents *Bs*, then the result of deleting
some or all of the indentation from one or more lines in which the
- next [non-whitespace character] after the indentation is
+ next character other than a space or tab after the indentation is
[paragraph continuation text] is a
list item with the same contents and attributes. The unindented
lines are called
@@ -4544,7 +4899,7 @@ continued here.
The rules for sublists follow from the general rules
[above][List items]. A sublist must be indented the same number
-of spaces a paragraph would need to be in order to be included
+of spaces of indentation a paragraph would need to be in order to be included
in the list item.
So, in this case we need two spaces indent:
@@ -4777,8 +5132,8 @@ The choice of four spaces is arbitrary. It can be learned, but it is
not likely to be guessed, and it trips up beginners regularly.
Would it help to adopt a two-space rule? The problem is that such
-a rule, together with the rule allowing 1--3 spaces indentation of the
-initial list marker, allows text that is indented *less than* the
+a rule, together with the rule allowing up to three spaces of indentation for
+the initial list marker, allows text that is indented *less than* the
original list marker to be included in the list item. For example,
`Markdown.pl` parses
@@ -4994,11 +5349,11 @@ by itself should be a paragraph followed by a nested sublist.
Since it is well established Markdown practice to allow lists to
interrupt paragraphs inside list items, the [principle of
uniformity] requires us to allow this outside list items as
-well. ([reStructuredText](http://docutils.sourceforge.net/rst.html)
+well. ([reStructuredText](https://docutils.sourceforge.net/rst.html)
takes a different approach, requiring blank lines before lists
even inside other list items.)
-In order to solve of unwanted lists in paragraphs with
+In order to solve the problem of unwanted lists in paragraphs with
hard-wrapped numerals, we allow only lists starting with `1` to
interrupt paragraphs. Thus,
@@ -5170,8 +5525,8 @@ item:
````````````````````````````````
-Note, however, that list items may not be indented more than
-three spaces. Here `- e` is treated as a paragraph continuation
+Note, however, that list items may not be preceded by more than
+three spaces of indentation. Here `- e` is treated as a paragraph continuation
line, because it is indented more than three spaces:
```````````````````````````````` example
@@ -5257,7 +5612,7 @@ So is this, with a empty second item:
````````````````````````````````
-These are loose lists, even though there is no space between the items,
+These are loose lists, even though there are no blank lines between the items,
because one of the items directly contains two block-level elements
with a blank line between them:
@@ -5278,572 +5633,233 @@ with a blank line between them:
d
-
-
-````````````````````````````````
-
-
-```````````````````````````````` example
-- a
-- b
-
- [ref]: /url
-- d
-.
-
-
-
a
-
-
-
b
-
-
-
d
-
-
-````````````````````````````````
-
-
-This is a tight list, because the blank lines are in a code block:
-
-```````````````````````````````` example
-- a
-- ```
- b
-
-
- ```
-- c
-.
-
-
a
-
-
b
-
-
-
-
-
c
-
-````````````````````````````````
-
-
-This is a tight list, because the blank line is between two
-paragraphs of a sublist. So the sublist is loose while
-the outer list is tight:
-
-```````````````````````````````` example
-- a
- - b
-
- c
-- d
-.
-
-
a
-
-
-
b
-
c
-
-
-
-
d
-
-````````````````````````````````
-
-
-This is a tight list, because the blank line is inside the
-block quote:
-
-```````````````````````````````` example
-* a
- > b
- >
-* c
-.
-
-
a
-
-
b
-
-
-
c
-
-````````````````````````````````
-
-
-This list is tight, because the consecutive block elements
-are not separated by blank lines:
-
-```````````````````````````````` example
-- a
- > b
- ```
- c
- ```
-- d
-.
-
-
a
-
-
b
-
-
c
-
-
-
d
-
-````````````````````````````````
-
-
-A single-paragraph list is tight:
-
-```````````````````````````````` example
-- a
-.
-
-
a
-
-````````````````````````````````
-
-
-```````````````````````````````` example
-- a
- - b
-.
-
-
a
-
-
b
-
-
-
-````````````````````````````````
-
-
-This list is loose, because of the blank line between the
-two block elements in the list item:
-
-```````````````````````````````` example
-1. ```
- foo
- ```
-
- bar
-.
-
-
-
foo
-
-
bar
-
-
-````````````````````````````````
-
-
-Here the outer list is loose, the inner list tight:
-
-```````````````````````````````` example
-* foo
- * bar
-
- baz
-.
-
-
-
foo
-
-
bar
-
-
baz
-
-
-````````````````````````````````
-
-
-```````````````````````````````` example
-- a
- - b
- - c
-
-- d
- - e
- - f
-.
-
-
-
a
-
-
b
-
c
-
-
-
-
d
-
-
e
-
f
-
-
-
-````````````````````````````````
-
-
-# Inlines
-
-Inlines are parsed sequentially from the beginning of the character
-stream to the end (left to right, in left-to-right languages).
-Thus, for example, in
-
-```````````````````````````````` example
-`hi`lo`
-.
-
hilo`
-````````````````````````````````
-
-`hi` is parsed as code, leaving the backtick at the end as a literal
-backtick.
-
-
-## Backslash escapes
-
-Any ASCII punctuation character may be backslash-escaped:
-
-```````````````````````````````` example
-\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~
-.
-
!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
-````````````````````````````````
-
-
-Backslashes before other characters are treated as literal
-backslashes:
-
-```````````````````````````````` example
-\→\A\a\ \3\φ\«
-.
-
\→\A\a\ \3\φ\«
-````````````````````````````````
-
-
-Escaped characters are treated as regular characters and do
-not have their usual Markdown meanings:
-
-```````````````````````````````` example
-\*not emphasized*
-\ not a tag
-\[not a link](/foo)
-\`not code`
-1\. not a list
-\* not a list
-\# not a heading
-\[foo]: /url "not a reference"
-\ö not a character entity
-.
-
*not emphasized*
-<br/> not a tag
-[not a link](/foo)
-`not code`
-1. not a list
-* not a list
-# not a heading
-[foo]: /url "not a reference"
-ö not a character entity
-````````````````````````````````
-
-
-If a backslash is itself escaped, the following character is not:
-
-```````````````````````````````` example
-\\*emphasis*
-.
-
\emphasis
-````````````````````````````````
-
-
-A backslash at the end of the line is a [hard line break]:
-
-```````````````````````````````` example
-foo\
-bar
-.
-
foo
-bar
-````````````````````````````````
-
-
-Backslash escapes do not work in code blocks, code spans, autolinks, or
-raw HTML:
-
-```````````````````````````````` example
-`` \[\` ``
-.
-
\[\`
-````````````````````````````````
-
-
-```````````````````````````````` example
- \[\]
-.
-
\[\]
-
-````````````````````````````````
-
-
-```````````````````````````````` example
-~~~
-\[\]
-~~~
-.
-
\[\]
-
-````````````````````````````````
-
-
-```````````````````````````````` example
-
-.
-
-````````````````````````````````
-
-
-[Decimal numeric character
-references](@)
-consist of `` + a string of 1--7 arabic digits + `;`. A
-numeric character reference is parsed as the corresponding
-Unicode character. Invalid Unicode code points will be replaced by
-the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons,
-the code point `U+0000` will also be replaced by `U+FFFD`.
-
-```````````````````````````````` example
-# Ӓ Ϡ
-.
-
# Ӓ Ϡ �
-````````````````````````````````
-
-
-[Hexadecimal numeric character
-references](@) consist of `` +
-either `X` or `x` + a string of 1-6 hexadecimal digits + `;`.
-They too are parsed as the corresponding Unicode character (this
-time specified with a hexadecimal numeral instead of decimal).
-
-```````````````````````````````` example
-" ആ ಫ
-.
-
" ആ ಫ
+
+
````````````````````````````````
-Here are some nonentities:
-
```````````````````````````````` example
-  &x;
-
-abcdef0;
-&ThisIsNotDefined; &hi?;
+- a
+- b
+
+ [ref]: /url
+- d
.
-
````````````````````````````````
-Strings that are not on the list of HTML5 named entities are not
-recognized as entity references either:
+This is a tight list, because the blank line is between two
+paragraphs of a sublist. So the sublist is loose while
+the outer list is tight:
```````````````````````````````` example
-&MadeUpEntity;
+- a
+ - b
+
+ c
+- d
.
-
&MadeUpEntity;
+
+
a
+
+
+
b
+
c
+
+
+
+
d
+
````````````````````````````````
-Entity and numeric character references are recognized in any
-context besides code spans or code blocks, including
-URLs, [link titles], and [fenced code block][] [info strings]:
+This is a tight list, because the blank line is inside the
+block quote:
```````````````````````````````` example
-
+* a
+ > b
+ >
+* c
.
-
+
````````````````````````````````
-```````````````````````````````` example
-[foo]
+A single-paragraph list is tight:
-[foo]: /föö "föö"
+```````````````````````````````` example
+- a
.
-
````````````````````````````````
```````````````````````````````` example
-``` föö
-foo
-```
+- a
+ - b
.
-
foo
-
+
+
a
+
+
b
+
+
+
````````````````````````````````
-Entity and numeric character references are treated as literal
-text in code spans and code blocks:
+This list is loose, because of the blank line between the
+two block elements in the list item:
```````````````````````````````` example
-`föö`
-.
-
föö
-````````````````````````````````
-
+1. ```
+ foo
+ ```
-```````````````````````````````` example
- föfö
+ bar
.
-
föfö
+
+
+
foo
+
bar
+
+
````````````````````````````````
-Entity and numeric character references cannot be used
-in place of symbols indicating structure in CommonMark
-documents.
+Here the outer list is loose, the inner list tight:
```````````````````````````````` example
-*foo*
-*foo*
+* foo
+ * bar
+
+ baz
.
-
*foo*
-foo
+
+
+
foo
+
+
bar
+
+
baz
+
+
````````````````````````````````
+
```````````````````````````````` example
-* foo
+- a
+ - b
+ - c
-* foo
+- d
+ - e
+ - f
.
-
* foo
-
foo
+
+
a
+
+
b
+
c
+
+
+
+
d
+
+
e
+
f
+
+
````````````````````````````````
-```````````````````````````````` example
-foo
bar
-.
-
foo
-bar
-````````````````````````````````
+# Inlines
+
+Inlines are parsed sequentially from the beginning of the character
+stream to the end (left to right, in left-to-right languages).
+Thus, for example, in
```````````````````````````````` example
- foo
+`hi`lo`
.
-
→foo
+
hilo`
````````````````````````````````
+`hi` is parsed as code, leaving the backtick at the end as a literal
+backtick.
-```````````````````````````````` example
-[a](url "tit")
-.
-
[a](url "tit")
-````````````````````````````````
## Code spans
@@ -5854,7 +5870,7 @@ preceded nor followed by a backtick.
A [code span](@) begins with a backtick string and ends with
a backtick string of equal length. The contents of the code span are
-the characters between the two backtick strings, normalized in the
+the characters between these two backtick strings, normalized in the
following ways:
- First, [line endings] are converted to [spaces].
@@ -6038,18 +6054,18 @@ But this is an HTML tag:
And this is code:
```````````````````````````````` example
-``
+``
.
-
<http://foo.bar.baz>`
+
<https://foo.bar.baz>`
````````````````````````````````
But this is an autolink:
```````````````````````````````` example
-`
+`
.
-
````````````````````````````````
@@ -6082,7 +6098,7 @@ closing backtick strings to be equal in length:
## Emphasis and strong emphasis
John Gruber's original [Markdown syntax
-description](http://daringfireball.net/projects/markdown/syntax#em) says:
+description](https://daringfireball.net/projects/markdown/syntax#em) says:
> Markdown treats asterisks (`*`) and underscores (`_`) as indicators of
> emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML
@@ -6133,17 +6149,17 @@ a non-backslash-escaped `_` character.
A [left-flanking delimiter run](@) is
a [delimiter run] that is (1) not followed by [Unicode whitespace],
-and either (2a) not followed by a [punctuation character], or
-(2b) followed by a [punctuation character] and
-preceded by [Unicode whitespace] or a [punctuation character].
+and either (2a) not followed by a [Unicode punctuation character], or
+(2b) followed by a [Unicode punctuation character] and
+preceded by [Unicode whitespace] or a [Unicode punctuation character].
For purposes of this definition, the beginning and the end of
the line count as Unicode whitespace.
A [right-flanking delimiter run](@) is
a [delimiter run] that is (1) not preceded by [Unicode whitespace],
-and either (2a) not preceded by a [punctuation character], or
-(2b) preceded by a [punctuation character] and
-followed by [Unicode whitespace] or a [punctuation character].
+and either (2a) not preceded by a [Unicode punctuation character], or
+(2b) preceded by a [Unicode punctuation character] and
+followed by [Unicode whitespace] or a [Unicode punctuation character].
For purposes of this definition, the beginning and the end of
the line count as Unicode whitespace.
@@ -6184,7 +6200,7 @@ Here are some examples of delimiter runs.
(The idea of distinguishing left-flanking and right-flanking
delimiter runs based on the character before and the character
after comes from Roopesh Chander's
-[vfmd](http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags).
+[vfmd](https://web.archive.org/web/20220608143320/http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags).
vfmd uses the terminology "emphasis indicator string" instead of "delimiter
run," and its rules for distinguishing left- and right-flanking runs
are a bit more complex than the ones given here.)
@@ -6198,7 +6214,7 @@ The following rules define emphasis and strong emphasis:
it is part of a [left-flanking delimiter run]
and either (a) not part of a [right-flanking delimiter run]
or (b) part of a [right-flanking delimiter run]
- preceded by punctuation.
+ preceded by a [Unicode punctuation character].
3. A single `*` character [can close emphasis](@)
iff it is part of a [right-flanking delimiter run].
@@ -6207,7 +6223,7 @@ The following rules define emphasis and strong emphasis:
it is part of a [right-flanking delimiter run]
and either (a) not part of a [left-flanking delimiter run]
or (b) part of a [left-flanking delimiter run]
- followed by punctuation.
+ followed by a [Unicode punctuation character].
5. A double `**` [can open strong emphasis](@)
iff it is part of a [left-flanking delimiter run].
@@ -6216,7 +6232,7 @@ The following rules define emphasis and strong emphasis:
it is part of a [left-flanking delimiter run]
and either (a) not part of a [right-flanking delimiter run]
or (b) part of a [right-flanking delimiter run]
- preceded by punctuation.
+ preceded by a [Unicode punctuation character].
7. A double `**` [can close strong emphasis](@)
iff it is part of a [right-flanking delimiter run].
@@ -6225,7 +6241,7 @@ The following rules define emphasis and strong emphasis:
it is part of a [right-flanking delimiter run]
and either (a) not part of a [left-flanking delimiter run]
or (b) part of a [left-flanking delimiter run]
- followed by punctuation.
+ followed by a [Unicode punctuation character].
9. Emphasis begins with a delimiter that [can open emphasis] and ends
with a delimiter that [can close emphasis], and that uses the same
@@ -6326,6 +6342,21 @@ Unicode nonbreaking spaces count as whitespace, too:
````````````````````````````````
+Unicode symbols count as punctuation, too:
+
+```````````````````````````````` example
+*$*alpha.
+
+*£*bravo.
+
+*€*charlie.
+.
+
*$*alpha.
+
*£*bravo.
+
*€*charlie.
+````````````````````````````````
+
+
Intraword emphasis with `*` is permitted:
```````````````````````````````` example
@@ -6437,7 +6468,7 @@ whitespace:
````````````````````````````````
-A newline also counts as whitespace:
+A line ending also counts as whitespace:
```````````````````````````````` example
*foo bar
@@ -6602,7 +6633,7 @@ __ foo bar__
````````````````````````````````
-A newline counts as whitespace:
+A line ending counts as whitespace:
```````````````````````````````` example
__
foo bar__
@@ -6881,7 +6912,7 @@ emphasis sections in this example:
The same condition ensures that the following
cases are all strong emphasis nested inside
-emphasis, even when the interior spaces are
+emphasis, even when the interior whitespace is
omitted:
@@ -7411,16 +7442,16 @@ _a `_`_
```````````````````````````````` example
-**a
+**a
.
-
````````````````````````````````
@@ -7458,13 +7489,14 @@ following rules apply:
A [link destination](@) consists of either
- a sequence of zero or more characters between an opening `<` and a
- closing `>` that contains no line breaks or unescaped
+ closing `>` that contains no line endings or unescaped
`<` or `>` characters, or
-- a nonempty sequence of characters that does not start with
- `<`, does not include ASCII space or control characters, and
- includes parentheses only if (a) they are backslash-escaped or
- (b) they are part of a balanced pair of unescaped parentheses.
+- a nonempty sequence of characters that does not start with `<`,
+ does not include [ASCII control characters][ASCII control character]
+ or [space] character, and includes parentheses only if (a) they are
+ backslash-escaped or (b) they are part of a balanced pair of
+ unescaped parentheses.
(Implementations may impose limits on parentheses nesting to
avoid performance issues, but at least three levels of nesting
should be supported.)
@@ -7487,10 +7519,14 @@ Although [link titles] may span multiple lines, they may not contain
a [blank line].
An [inline link](@) consists of a [link text] followed immediately
-by a left parenthesis `(`, optional [whitespace], an optional
-[link destination], an optional [link title] separated from the link
-destination by [whitespace], optional [whitespace], and a right
-parenthesis `)`. The link's text consists of the inlines contained
+by a left parenthesis `(`, an optional [link destination], an optional
+[link title], and a right parenthesis `)`.
+These four components may be separated by spaces, tabs, and up to one line
+ending.
+If both [link destination] and [link title] are present, they *must* be
+separated by spaces, tabs, and up to one line ending.
+
+The link's text consists of the inlines contained
in the [link text] (excluding the enclosing square brackets).
The link's URI consists of the link destination, excluding enclosing
`<...>` if present, with backslash-escapes in effect as described
@@ -7507,7 +7543,8 @@ Here is a simple inline link:
````````````````````````````````
-The title may be omitted:
+The title, the link text and even
+the destination may be omitted:
```````````````````````````````` example
[link](/uri)
@@ -7515,8 +7552,12 @@ The title may be omitted:
````````````````````````````````
+```````````````````````````````` example
+[](./target.md)
+.
+
+````````````````````````````````
-Both the title and the destination may be omitted:
```````````````````````````````` example
[link]()
@@ -7531,6 +7572,13 @@ Both the title and the destination may be omitted:
````````````````````````````````
+
+```````````````````````````````` example
+[]()
+.
+
+````````````````````````````````
+
The destination can only contain spaces if it is
enclosed in pointy brackets:
@@ -7546,7 +7594,7 @@ enclosed in pointy brackets:
````````````````````````````````
-The destination cannot contain line breaks,
+The destination cannot contain line endings,
even if enclosed in pointy brackets:
```````````````````````````````` example
@@ -7615,6 +7663,13 @@ balanced:
However, if you have unbalanced parentheses, you need to escape or use the
`<...>` form:
+```````````````````````````````` example
+[link](foo(and(bar))
+.
+
[link](foo(and(bar))
+````````````````````````````````
+
+
```````````````````````````````` example
[link](foo\(and\(bar\))
.
@@ -7644,13 +7699,13 @@ A link can contain fragment identifiers and queries:
```````````````````````````````` example
[link](#fragment)
-[link](http://example.com#fragment)
+[link](https://example.com#fragment)
-[link](http://example.com?foo=3#frag)
+[link](https://example.com?foo=3#frag)
.
````````````````````````````````
@@ -7714,7 +7769,8 @@ may be used in titles:
````````````````````````````````
-Titles must be separated from the link using a [whitespace].
+Titles must be separated from the link using spaces, tabs, and up to one line
+ending.
Other [Unicode whitespace] like non-breaking space doesn't work.
```````````````````````````````` example
@@ -7757,7 +7813,8 @@ titles with no closing quotation mark, though 1.0.2b8 does not.
It seems preferable to adopt a simple, rational rule that works
the same way in inline links and link reference definitions.)
-[Whitespace] is allowed around the destination and title:
+Spaces, tabs, and up to one line ending is allowed around the destination and
+title:
```````````````````````````````` example
[link]( /uri
@@ -7892,9 +7949,9 @@ and autolinks over link grouping:
```````````````````````````````` example
-[foo
+[foo
.
-
````````````````````````````````
@@ -7908,7 +7965,8 @@ that [matches] a [link reference definition] elsewhere in the document.
A [link label](@) begins with a left bracket (`[`) and ends
with the first right bracket (`]`) that is not backslash-escaped.
-Between these brackets there must be at least one [non-whitespace character].
+Between these brackets there must be at least one character that is not a space,
+tab, or line ending.
Unescaped square bracket characters are not allowed inside the
opening and closing square brackets of [link labels]. A link
label can have at most 999 characters inside the square
@@ -7918,14 +7976,13 @@ One label [matches](@)
another just in case their normalized forms are equal. To normalize a
label, strip off the opening and closing brackets,
perform the *Unicode case fold*, strip leading and trailing
-[whitespace] and collapse consecutive internal
-[whitespace] to a single space. If there are multiple
+spaces, tabs, and line endings, and collapse consecutive internal
+spaces, tabs, and line endings to a single space. If there are multiple
matching reference link definitions, the one that comes first in the
document is used. (It is desirable in such cases to emit a warning.)
-The contents of the first link label are parsed as inlines, which are
-used as the link's text. The link's URI and title are provided by the
-matching [link reference definition].
+The link's URI and title are provided by the matching [link
+reference definition].
Here is a simple example:
@@ -8018,11 +8075,11 @@ emphasis grouping:
```````````````````````````````` example
-[foo *bar][ref]
+[foo *bar][ref]*
[ref]: /uri
.
-
````````````````````````````````
@@ -8048,11 +8105,11 @@ and autolinks over link grouping:
```````````````````````````````` example
-[foo
+[foo
[ref]: /uri
.
-
````````````````````````````````
@@ -8070,15 +8127,15 @@ Matching is case-insensitive:
Unicode case fold is used:
```````````````````````````````` example
-[Толпой][Толпой] is a Russian word.
+[ẞ]
-[ТОЛПОЙ]: /url
+[SS]: /url
.
-
````````````````````````````````
-Consecutive internal [whitespace] is treated as one space for
+Consecutive internal spaces, tabs, and line endings are treated as one space for
purposes of determining matching:
```````````````````````````````` example
@@ -8091,7 +8148,7 @@ purposes of determining matching:
````````````````````````````````
-No [whitespace] is allowed between the [link text] and the
+No spaces, tabs, or line endings are allowed between the [link text] and the
[link label]:
```````````````````````````````` example
@@ -8221,7 +8278,8 @@ Note that in this example `]` is not backslash-escaped:
````````````````````````````````
-A [link label] must contain at least one [non-whitespace character]:
+A [link label] must contain at least one character that is not a space, tab, or
+line ending:
```````````````````````````````` example
[]
@@ -8251,7 +8309,7 @@ A [collapsed reference link](@)
consists of a [link label] that [matches] a
[link reference definition] elsewhere in the
document, followed by the string `[]`.
-The contents of the first link label are parsed as inlines,
+The contents of the link label are parsed as inlines,
which are used as the link's text. The link's URI and title are
provided by the matching reference link definition. Thus,
`[foo][]` is equivalent to `[foo][foo]`.
@@ -8286,7 +8344,7 @@ The link labels are case-insensitive:
-As with full reference links, [whitespace] is not
+As with full reference links, spaces, tabs, or line endings are not
allowed between the two sets of brackets:
```````````````````````````````` example
@@ -8304,7 +8362,7 @@ A [shortcut reference link](@)
consists of a [link label] that [matches] a
[link reference definition] elsewhere in the
document and is not followed by `[]` or a link label.
-The contents of the first link label are parsed as inlines,
+The contents of the link label are parsed as inlines,
which are used as the link's text. The link's URI and title
are provided by the matching link reference definition.
Thus, `[foo]` is equivalent to `[foo][]`.
@@ -8391,7 +8449,7 @@ following closing bracket:
````````````````````````````````
-Full and compact references take precedence over shortcut
+Full and collapsed references take precedence over shortcut
references:
```````````````````````````````` example
@@ -8614,7 +8672,7 @@ The labels are case-insensitive:
````````````````````````````````
-As with reference links, [whitespace] is not allowed
+As with reference links, spaces, tabs, and line endings, are not allowed
between the two sets of brackets:
```````````````````````````````` example
@@ -8707,9 +8765,9 @@ a link to the URI, with the URI as the link's label.
An [absolute URI](@),
for these purposes, consists of a [scheme] followed by a colon (`:`)
-followed by zero or more characters other than ASCII
-[whitespace] and control characters, `<`, and `>`. If
-the URI includes these characters, they must be percent-encoded
+followed by zero or more characters other than [ASCII control
+characters][ASCII control character], [space], `<`, and `>`.
+If the URI includes these characters, they must be percent-encoded
(e.g. `%20` for a space).
For purposes of this spec, a [scheme](@) is any sequence
@@ -8727,9 +8785,9 @@ Here are some valid autolinks:
```````````````````````````````` example
-
+
.
-
````````````````````````````````
@@ -8785,18 +8843,18 @@ with their syntax:
Spaces are not allowed in autolinks:
```````````````````````````````` example
-
+
.
-
<http://foo.bar/baz bim>
+
<https://foo.bar/baz bim>
````````````````````````````````
Backslash-escapes do not work inside autolinks:
```````````````````````````````` example
-
+
.
-
````````````````````````````````
@@ -8848,9 +8906,9 @@ These are not autolinks:
```````````````````````````````` example
-< http://foo.bar >
+< https://foo.bar >
.
-
< http://foo.bar >
+
< https://foo.bar >
````````````````````````````````
@@ -8869,9 +8927,9 @@ These are not autolinks:
```````````````````````````````` example
-http://example.com
+https://example.com
.
-
http://example.com
+
https://example.com
````````````````````````````````
@@ -8895,7 +8953,7 @@ A [tag name](@) consists of an ASCII letter
followed by zero or more ASCII letters, digits, or
hyphens (`-`).
-An [attribute](@) consists of [whitespace],
+An [attribute](@) consists of spaces, tabs, and up to one line ending,
an [attribute name], and an optional
[attribute value specification].
@@ -8905,9 +8963,9 @@ letters, digits, `_`, `.`, `:`, or `-`. (Note: This is the XML
specification restricted to ASCII. HTML5 is laxer.)
An [attribute value specification](@)
-consists of optional [whitespace],
-a `=` character, optional [whitespace], and an [attribute
-value].
+consists of optional spaces, tabs, and up to one line ending,
+a `=` character, optional spaces, tabs, and up to one line ending,
+and an [attribute value].
An [attribute value](@)
consists of an [unquoted attribute value],
@@ -8915,7 +8973,7 @@ a [single-quoted attribute value], or a [double-quoted attribute value].
An [unquoted attribute value](@)
is a nonempty string of characters not
-including [whitespace], `"`, `'`, `=`, `<`, `>`, or `` ` ``.
+including spaces, tabs, line endings, `"`, `'`, `=`, `<`, `>`, or `` ` ``.
A [single-quoted attribute value](@)
consists of `'`, zero or more
@@ -8926,26 +8984,24 @@ consists of `"`, zero or more
characters not including `"`, and a final `"`.
An [open tag](@) consists of a `<` character, a [tag name],
-zero or more [attributes], optional [whitespace], an optional `/`
-character, and a `>` character.
+zero or more [attributes], optional spaces, tabs, and up to one line ending,
+an optional `/` character, and a `>` character.
A [closing tag](@) consists of the string ``, a
-[tag name], optional [whitespace], and the character `>`.
+[tag name], optional spaces, tabs, and up to one line ending, and the character
+`>`.
-An [HTML comment](@) consists of ``,
-where *text* does not start with `>` or `->`, does not end with `-`,
-and does not contain `--`. (See the
-[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).)
+An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the
+[HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)).
A [processing instruction](@)
consists of the string ``, a string
of characters not including the string `?>`, and the string
`?>`.
-A [declaration](@) consists of the
-string ``, and the character `>`.
+A [declaration](@) consists of the string ``, and the character `>`.
A [CDATA section](@) consists of
the string `<
@@ -9046,7 +9102,7 @@ bim!bop />
````````````````````````````````
-Missing [whitespace]:
+Missing whitespace:
```````````````````````````````` example
@@ -9076,30 +9132,20 @@ Illegal attributes in closing tag:
Comments:
```````````````````````````````` example
-foo
+foo
.
-
+ * Note that child nodes are themselves blocks, e.g. {@link Paragraph}, {@link ListBlock} etc.
+ *
+ * @see CommonMark Spec
+ */
public class BlockQuote extends Block {
@Override
diff --git a/commonmark/src/main/java/org/commonmark/node/BulletList.java b/commonmark/src/main/java/org/commonmark/node/BulletList.java
index 127862312..014f4d3b2 100644
--- a/commonmark/src/main/java/org/commonmark/node/BulletList.java
+++ b/commonmark/src/main/java/org/commonmark/node/BulletList.java
@@ -1,20 +1,50 @@
package org.commonmark.node;
+/**
+ * A bullet list, e.g.:
+ *
+ * - One
+ * - Two
+ * - Three
+ *
+ *
+ * The children are {@link ListItem} blocks, which contain other blocks (or nested lists).
+ *
+ * @see CommonMark Spec: List items
+ */
public class BulletList extends ListBlock {
- private char bulletMarker;
+ private String marker;
@Override
public void accept(Visitor visitor) {
visitor.visit(this);
}
+ /**
+ * @return the bullet list marker that was used, e.g. {@code -}, {@code *} or {@code +}, if available, or null otherwise
+ */
+ public String getMarker() {
+ return marker;
+ }
+
+ public void setMarker(String marker) {
+ this.marker = marker;
+ }
+
+ /**
+ * @deprecated use {@link #getMarker()} instead
+ */
+ @Deprecated
public char getBulletMarker() {
- return bulletMarker;
+ return marker != null && !marker.isEmpty() ? marker.charAt(0) : '\0';
}
+ /**
+ * @deprecated use {@link #getMarker()} instead
+ */
+ @Deprecated
public void setBulletMarker(char bulletMarker) {
- this.bulletMarker = bulletMarker;
+ this.marker = bulletMarker != '\0' ? String.valueOf(bulletMarker) : null;
}
-
}
diff --git a/commonmark/src/main/java/org/commonmark/node/Code.java b/commonmark/src/main/java/org/commonmark/node/Code.java
index 0b47ecb71..3b79e0c9c 100644
--- a/commonmark/src/main/java/org/commonmark/node/Code.java
+++ b/commonmark/src/main/java/org/commonmark/node/Code.java
@@ -1,5 +1,13 @@
package org.commonmark.node;
+/**
+ * Inline code span, e.g.:
+ *
+ * Some `inline code`
+ *
+ *
+ * @see CommonMark Spec
+ */
public class Code extends Node {
private String literal;
@@ -16,6 +24,10 @@ public void accept(Visitor visitor) {
visitor.visit(this);
}
+ /**
+ * @return the literal text in the code span (note that it's not necessarily the raw text between tildes,
+ * e.g. when spaces are stripped)
+ */
public String getLiteral() {
return literal;
}
diff --git a/commonmark/src/main/java/org/commonmark/node/CustomBlock.java b/commonmark/src/main/java/org/commonmark/node/CustomBlock.java
index 6596ec1a0..cad88933a 100644
--- a/commonmark/src/main/java/org/commonmark/node/CustomBlock.java
+++ b/commonmark/src/main/java/org/commonmark/node/CustomBlock.java
@@ -1,5 +1,8 @@
package org.commonmark.node;
+/**
+ * A block that extensions can subclass to define custom blocks (not part of the core specification).
+ */
public abstract class CustomBlock extends Block {
@Override
diff --git a/commonmark/src/main/java/org/commonmark/node/CustomNode.java b/commonmark/src/main/java/org/commonmark/node/CustomNode.java
index a68e5cc11..88f0254da 100644
--- a/commonmark/src/main/java/org/commonmark/node/CustomNode.java
+++ b/commonmark/src/main/java/org/commonmark/node/CustomNode.java
@@ -1,5 +1,8 @@
package org.commonmark.node;
+/**
+ * A node that extensions can subclass to define custom nodes (not part of the core specification).
+ */
public abstract class CustomNode extends Node {
@Override
public void accept(Visitor visitor) {
diff --git a/commonmark/src/main/java/org/commonmark/node/DefinitionMap.java b/commonmark/src/main/java/org/commonmark/node/DefinitionMap.java
new file mode 100644
index 000000000..59cb88274
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/node/DefinitionMap.java
@@ -0,0 +1,67 @@
+package org.commonmark.node;
+
+import org.commonmark.internal.util.Escaping;
+
+import java.util.Collection;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * A map that can be used to store and look up reference definitions by a label. The labels are case-insensitive and
+ * normalized, the same way as for {@link LinkReferenceDefinition} nodes.
+ *
+ * @param the type of value
+ */
+public class DefinitionMap {
+
+ private final Class type;
+ // LinkedHashMap for determinism and to preserve document order
+ private final Map definitions = new LinkedHashMap<>();
+
+ public DefinitionMap(Class type) {
+ this.type = type;
+ }
+
+ public Class getType() {
+ return type;
+ }
+
+ public void addAll(DefinitionMap that) {
+ for (var entry : that.definitions.entrySet()) {
+ // Note that keys are already normalized, so we can add them directly
+ definitions.putIfAbsent(entry.getKey(), entry.getValue());
+ }
+ }
+
+ /**
+ * Store a new definition unless one is already in the map. If there is no definition for that label yet, return null.
+ * Otherwise, return the existing definition.
+ *
+ * The label is normalized by the definition map before storing.
+ */
+ public D putIfAbsent(String label, D definition) {
+ String normalizedLabel = Escaping.normalizeLabelContent(label);
+
+ // spec: When there are multiple matching link reference definitions, the first is used
+ return definitions.putIfAbsent(normalizedLabel, definition);
+ }
+
+ /**
+ * Look up a definition by label. The label is normalized by the definition map before lookup.
+ *
+ * @return the value or null
+ */
+ public D get(String label) {
+ String normalizedLabel = Escaping.normalizeLabelContent(label);
+ return definitions.get(normalizedLabel);
+ }
+
+ public Set keySet() {
+ return definitions.keySet();
+ }
+
+ public Collection values() {
+ return definitions.values();
+ }
+}
diff --git a/commonmark/src/main/java/org/commonmark/node/Document.java b/commonmark/src/main/java/org/commonmark/node/Document.java
index 5b7e74189..b4968c206 100644
--- a/commonmark/src/main/java/org/commonmark/node/Document.java
+++ b/commonmark/src/main/java/org/commonmark/node/Document.java
@@ -1,5 +1,8 @@
package org.commonmark.node;
+/**
+ * The root block of a document, containing the top-level blocks.
+ */
public class Document extends Block {
@Override
diff --git a/commonmark/src/main/java/org/commonmark/node/Emphasis.java b/commonmark/src/main/java/org/commonmark/node/Emphasis.java
index 9877e7b63..5efc8c327 100644
--- a/commonmark/src/main/java/org/commonmark/node/Emphasis.java
+++ b/commonmark/src/main/java/org/commonmark/node/Emphasis.java
@@ -1,5 +1,13 @@
package org.commonmark.node;
+/**
+ * Emphasis, e.g.:
+ *
+ *
+ * @see CommonMark Spec
+ */
public class IndentedCodeBlock extends Block {
private String literal;
diff --git a/commonmark/src/main/java/org/commonmark/node/Link.java b/commonmark/src/main/java/org/commonmark/node/Link.java
index b2ed8c2a1..4edc7f676 100644
--- a/commonmark/src/main/java/org/commonmark/node/Link.java
+++ b/commonmark/src/main/java/org/commonmark/node/Link.java
@@ -18,7 +18,7 @@
* Note that the text in the link can contain inline formatting, so it could also contain an {@link Image} or
* {@link Emphasis}, etc.
*
- * @see CommonMark Spec for links
+ * @see CommonMark Spec
*/
public class Link extends Node {
@@ -46,6 +46,9 @@ public void setDestination(String destination) {
this.destination = destination;
}
+ /**
+ * @return the title or null
+ */
public String getTitle() {
return title;
}
diff --git a/commonmark/src/main/java/org/commonmark/node/LinkReferenceDefinition.java b/commonmark/src/main/java/org/commonmark/node/LinkReferenceDefinition.java
index 3f8bfd0f0..b866781f0 100644
--- a/commonmark/src/main/java/org/commonmark/node/LinkReferenceDefinition.java
+++ b/commonmark/src/main/java/org/commonmark/node/LinkReferenceDefinition.java
@@ -9,9 +9,9 @@
* They can be referenced anywhere else in the document to produce a link using [foo]. The definitions
* themselves are usually not rendered in the final output.
*
- * @see Link reference definitions
+ * @see CommonMark Spec
*/
-public class LinkReferenceDefinition extends Node {
+public class LinkReferenceDefinition extends Block {
private String label;
private String destination;
diff --git a/commonmark/src/main/java/org/commonmark/node/ListBlock.java b/commonmark/src/main/java/org/commonmark/node/ListBlock.java
index 69482f66e..1290bc622 100644
--- a/commonmark/src/main/java/org/commonmark/node/ListBlock.java
+++ b/commonmark/src/main/java/org/commonmark/node/ListBlock.java
@@ -1,12 +1,15 @@
package org.commonmark.node;
+/**
+ * A list block like {@link BulletList} or {@link OrderedList}.
+ */
public abstract class ListBlock extends Block {
private boolean tight;
/**
* @return whether this list is tight or loose
- * @see CommonMark Spec for tight lists
+ * @see CommonMark Spec for tight lists
*/
public boolean isTight() {
return tight;
diff --git a/commonmark/src/main/java/org/commonmark/node/ListItem.java b/commonmark/src/main/java/org/commonmark/node/ListItem.java
index aa526be01..c4d1214e7 100644
--- a/commonmark/src/main/java/org/commonmark/node/ListItem.java
+++ b/commonmark/src/main/java/org/commonmark/node/ListItem.java
@@ -1,9 +1,78 @@
package org.commonmark.node;
+/**
+ * A child of a {@link ListBlock}, containing other blocks (e.g. {@link Paragraph}, other lists, etc).
+ *
+ * Note that a list item can't directly contain {@link Text}, it needs to be:
+ * {@link ListItem} : {@link Paragraph} : {@link Text}.
+ * If you want a list that is rendered tightly, create a list with {@link ListBlock#setTight(boolean)}.
+ *
+ * @see CommonMark Spec: List items
+ */
public class ListItem extends Block {
+ private Integer markerIndent;
+ private Integer contentIndent;
+
@Override
public void accept(Visitor visitor) {
visitor.visit(this);
}
+
+ /**
+ * Returns the indent of the marker such as "-" or "1." in columns (spaces or tab stop of 4) if available, or null
+ * otherwise.
+ *
+ * Some examples and their marker indent:
+ *
- Foo
+ * Marker indent: 0
+ *
- Foo
+ * Marker indent: 1
+ *
1. Foo
+ * Marker indent: 2
+ */
+ public Integer getMarkerIndent() {
+ return markerIndent;
+ }
+
+ public void setMarkerIndent(Integer markerIndent) {
+ this.markerIndent = markerIndent;
+ }
+
+ /**
+ * Returns the indent of the content in columns (spaces or tab stop of 4) if available, or null otherwise.
+ * The content indent is counted from the beginning of the line and includes the marker on the first line.
+ *
+ * Some examples and their content indent:
+ *
- Foo
+ * Content indent: 2
+ *
- Foo
+ * Content indent: 3
+ *
1. Foo
+ * Content indent: 5
+ *
+ * Note that subsequent lines in the same list item need to be indented by at least the content indent to be counted
+ * as part of the list item.
+ */
+ public Integer getContentIndent() {
+ return contentIndent;
+ }
+
+ public void setContentIndent(Integer contentIndent) {
+ this.contentIndent = contentIndent;
+ }
+
+ /**
+ * @deprecated list items should only contain block nodes; if you're trying to create a list that is rendered
+ * without paragraphs, use {@link ListBlock#setTight(boolean)} instead.
+ */
+ @Override
+ @Deprecated
+ public void appendChild(Node child) {
+ super.appendChild(child);
+ }
+
+ public void appendChild(Block child) {
+ super.appendChild(child);
+ }
}
diff --git a/commonmark/src/main/java/org/commonmark/node/Node.java b/commonmark/src/main/java/org/commonmark/node/Node.java
index 5a2f036e4..d95a72c60 100644
--- a/commonmark/src/main/java/org/commonmark/node/Node.java
+++ b/commonmark/src/main/java/org/commonmark/node/Node.java
@@ -86,6 +86,9 @@ public void unlink() {
this.prev = null;
}
+ /**
+ * Inserts the {@code sibling} node after {@code this} node.
+ */
public void insertAfter(Node sibling) {
sibling.unlink();
sibling.next = this.next;
@@ -100,6 +103,9 @@ public void insertAfter(Node sibling) {
}
}
+ /**
+ * Inserts the {@code sibling} node before {@code this} node.
+ */
public void insertBefore(Node sibling) {
sibling.unlink();
sibling.prev = this.prev;
@@ -114,13 +120,12 @@ public void insertBefore(Node sibling) {
}
}
-
/**
* @return the source spans of this node if included by the parser, an empty list otherwise
* @since 0.16.0
*/
public List getSourceSpans() {
- return sourceSpans != null ? Collections.unmodifiableList(sourceSpans) : Collections.emptyList();
+ return sourceSpans != null ? Collections.unmodifiableList(sourceSpans) : List.of();
}
/**
diff --git a/commonmark/src/main/java/org/commonmark/node/OrderedList.java b/commonmark/src/main/java/org/commonmark/node/OrderedList.java
index 1f988234c..61f8902c0 100644
--- a/commonmark/src/main/java/org/commonmark/node/OrderedList.java
+++ b/commonmark/src/main/java/org/commonmark/node/OrderedList.java
@@ -1,29 +1,78 @@
package org.commonmark.node;
+/**
+ * An ordered list, e.g.:
+ *
+ * 1. One
+ * 2. Two
+ * 3. Three
+ *
+ *
+ * The children are {@link ListItem} blocks, which contain other blocks (or nested lists).
+ *
+ * @see CommonMark Spec: List items
+ */
public class OrderedList extends ListBlock {
- private int startNumber;
- private char delimiter;
+ private String markerDelimiter;
+ private Integer markerStartNumber;
@Override
public void accept(Visitor visitor) {
visitor.visit(this);
}
+ /**
+ * @return the start number used in the marker, e.g. {@code 1}, if available, or null otherwise
+ */
+ public Integer getMarkerStartNumber() {
+ return markerStartNumber;
+ }
+
+ public void setMarkerStartNumber(Integer markerStartNumber) {
+ this.markerStartNumber = markerStartNumber;
+ }
+
+ /**
+ * @return the delimiter used in the marker, e.g. {@code .} or {@code )}, if available, or null otherwise
+ */
+ public String getMarkerDelimiter() {
+ return markerDelimiter;
+ }
+
+ public void setMarkerDelimiter(String markerDelimiter) {
+ this.markerDelimiter = markerDelimiter;
+ }
+
+ /**
+ * @deprecated use {@link #getMarkerStartNumber()} instead
+ */
+ @Deprecated
public int getStartNumber() {
- return startNumber;
+ return markerStartNumber != null ? markerStartNumber : 0;
}
+ /**
+ * @deprecated use {@link #setMarkerStartNumber} instead
+ */
+ @Deprecated
public void setStartNumber(int startNumber) {
- this.startNumber = startNumber;
+ this.markerStartNumber = startNumber != 0 ? startNumber : null;
}
+ /**
+ * @deprecated use {@link #getMarkerDelimiter()} instead
+ */
+ @Deprecated
public char getDelimiter() {
- return delimiter;
+ return markerDelimiter != null && !markerDelimiter.isEmpty() ? markerDelimiter.charAt(0) : '\0';
}
+ /**
+ * @deprecated use {@link #setMarkerDelimiter} instead
+ */
+ @Deprecated
public void setDelimiter(char delimiter) {
- this.delimiter = delimiter;
+ this.markerDelimiter = delimiter != '\0' ? String.valueOf(delimiter) : null;
}
-
}
diff --git a/commonmark/src/main/java/org/commonmark/node/Paragraph.java b/commonmark/src/main/java/org/commonmark/node/Paragraph.java
index 176eaaa76..b298f1ce4 100644
--- a/commonmark/src/main/java/org/commonmark/node/Paragraph.java
+++ b/commonmark/src/main/java/org/commonmark/node/Paragraph.java
@@ -2,6 +2,8 @@
/**
* A paragraph block, contains inline nodes such as {@link Text}
+ *
+ * @see CommonMark Spec
*/
public class Paragraph extends Block {
diff --git a/commonmark/src/main/java/org/commonmark/node/SoftLineBreak.java b/commonmark/src/main/java/org/commonmark/node/SoftLineBreak.java
index e66458912..87445db56 100644
--- a/commonmark/src/main/java/org/commonmark/node/SoftLineBreak.java
+++ b/commonmark/src/main/java/org/commonmark/node/SoftLineBreak.java
@@ -1,5 +1,14 @@
package org.commonmark.node;
+/**
+ * A soft line break (as opposed to a {@link HardLineBreak}), e.g. between:
+ *
+ * foo
+ * bar
+ *
+ *
+ * @see CommonMark Spec
+ */
public class SoftLineBreak extends Node {
@Override
diff --git a/commonmark/src/main/java/org/commonmark/node/SourceSpan.java b/commonmark/src/main/java/org/commonmark/node/SourceSpan.java
index f7dbabc27..6558cc84a 100644
--- a/commonmark/src/main/java/org/commonmark/node/SourceSpan.java
+++ b/commonmark/src/main/java/org/commonmark/node/SourceSpan.java
@@ -27,32 +27,64 @@ public class SourceSpan {
private final int lineIndex;
private final int columnIndex;
+ private final int inputIndex;
private final int length;
+ public static SourceSpan of(int line, int col, int input, int length) {
+ return new SourceSpan(line, col, input, length);
+ }
+
+ /**
+ * @deprecated Use {{@link #of(int, int, int, int)}} instead to also specify input index. Using the deprecated one
+ * will set {@link #inputIndex} to 0.
+ */
+ @Deprecated
public static SourceSpan of(int lineIndex, int columnIndex, int length) {
- return new SourceSpan(lineIndex, columnIndex, length);
+ return of(lineIndex, columnIndex, 0, length);
}
- private SourceSpan(int lineIndex, int columnIndex, int length) {
+ private SourceSpan(int lineIndex, int columnIndex, int inputIndex, int length) {
+ if (lineIndex < 0) {
+ throw new IllegalArgumentException("lineIndex " + lineIndex + " must be >= 0");
+ }
+ if (columnIndex < 0) {
+ throw new IllegalArgumentException("columnIndex " + columnIndex + " must be >= 0");
+ }
+ if (inputIndex < 0) {
+ throw new IllegalArgumentException("inputIndex " + inputIndex + " must be >= 0");
+ }
+ if (length < 0) {
+ throw new IllegalArgumentException("length " + length + " must be >= 0");
+ }
this.lineIndex = lineIndex;
this.columnIndex = columnIndex;
+ this.inputIndex = inputIndex;
this.length = length;
}
/**
- * @return 0-based index of line in source
+ * @return 0-based line index, e.g. 0 for first line, 1 for the second line, etc
*/
public int getLineIndex() {
return lineIndex;
}
/**
- * @return 0-based index of column (character on line) in source
+ * @return 0-based index of column (character on line) in source, e.g. 0 for the first character of a line, 1 for
+ * the second character, etc
*/
public int getColumnIndex() {
return columnIndex;
}
+ /**
+ * @return 0-based index in whole input
+ * @since 0.24.0
+ */
+ public int getInputIndex() {
+ return inputIndex;
+ }
+
/**
* @return length of the span in characters
*/
@@ -60,6 +92,32 @@ public int getLength() {
return length;
}
+ public SourceSpan subSpan(int beginIndex) {
+ return subSpan(beginIndex, length);
+ }
+
+ public SourceSpan subSpan(int beginIndex, int endIndex) {
+ if (beginIndex < 0) {
+ throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " + must be >= 0");
+ }
+ if (beginIndex > length) {
+ throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " must be <= length " + length);
+ }
+ if (endIndex < 0) {
+ throw new IndexOutOfBoundsException("endIndex " + endIndex + " + must be >= 0");
+ }
+ if (endIndex > length) {
+ throw new IndexOutOfBoundsException("endIndex " + endIndex + " must be <= length " + length);
+ }
+ if (beginIndex > endIndex) {
+ throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " must be <= endIndex " + endIndex);
+ }
+ if (beginIndex == 0 && endIndex == length) {
+ return this;
+ }
+ return new SourceSpan(lineIndex, columnIndex + beginIndex, inputIndex + beginIndex, endIndex - beginIndex);
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) {
@@ -71,12 +129,13 @@ public boolean equals(Object o) {
SourceSpan that = (SourceSpan) o;
return lineIndex == that.lineIndex &&
columnIndex == that.columnIndex &&
+ inputIndex == that.inputIndex &&
length == that.length;
}
@Override
public int hashCode() {
- return Objects.hash(lineIndex, columnIndex, length);
+ return Objects.hash(lineIndex, columnIndex, inputIndex, length);
}
@Override
@@ -84,6 +143,7 @@ public String toString() {
return "SourceSpan{" +
"line=" + lineIndex +
", column=" + columnIndex +
+ ", input=" + inputIndex +
", length=" + length +
"}";
}
diff --git a/commonmark/src/main/java/org/commonmark/node/SourceSpans.java b/commonmark/src/main/java/org/commonmark/node/SourceSpans.java
index 3ab29f536..975d7fbdb 100644
--- a/commonmark/src/main/java/org/commonmark/node/SourceSpans.java
+++ b/commonmark/src/main/java/org/commonmark/node/SourceSpans.java
@@ -1,7 +1,6 @@
package org.commonmark.node;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
/**
@@ -18,7 +17,7 @@ public static SourceSpans empty() {
}
public List getSourceSpans() {
- return sourceSpans != null ? sourceSpans : Collections.emptyList();
+ return sourceSpans != null ? sourceSpans : List.of();
}
public void addAllFrom(Iterable extends Node> nodes) {
@@ -42,8 +41,8 @@ public void addAll(List other) {
int lastIndex = sourceSpans.size() - 1;
SourceSpan a = sourceSpans.get(lastIndex);
SourceSpan b = other.get(0);
- if (a.getLineIndex() == b.getLineIndex() && a.getColumnIndex() + a.getLength() == b.getColumnIndex()) {
- sourceSpans.set(lastIndex, SourceSpan.of(a.getLineIndex(), a.getColumnIndex(), a.getLength() + b.getLength()));
+ if (a.getInputIndex() + a.getLength() == b.getInputIndex()) {
+ sourceSpans.set(lastIndex, SourceSpan.of(a.getLineIndex(), a.getColumnIndex(), a.getInputIndex(), a.getLength() + b.getLength()));
sourceSpans.addAll(other.subList(1, other.size()));
} else {
sourceSpans.addAll(other);
diff --git a/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java b/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java
index dbff571cd..0dbeed3df 100644
--- a/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java
+++ b/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java
@@ -1,5 +1,13 @@
package org.commonmark.node;
+/**
+ * Strong emphasis, e.g.:
+ *
+ * Some **strong emphasis** or __strong emphasis__
+ *
+ *
+ * @see CommonMark Spec: Emphasis and strong emphasis
+ */
public class StrongEmphasis extends Node implements Delimited {
private String delimiter;
diff --git a/commonmark/src/main/java/org/commonmark/node/Text.java b/commonmark/src/main/java/org/commonmark/node/Text.java
index f16fc907b..9a04c41c1 100644
--- a/commonmark/src/main/java/org/commonmark/node/Text.java
+++ b/commonmark/src/main/java/org/commonmark/node/Text.java
@@ -1,5 +1,15 @@
package org.commonmark.node;
+/**
+ * A text node, e.g. in:
+ *
+ * foo *bar*
+ *
+ *
+ * The foo is a text node, and the bar inside the emphasis is also a text node.
+ *
+ * @see CommonMark Spec
+ */
public class Text extends Node {
private String literal;
diff --git a/commonmark/src/main/java/org/commonmark/node/ThematicBreak.java b/commonmark/src/main/java/org/commonmark/node/ThematicBreak.java
index f81abaa31..a31131e07 100644
--- a/commonmark/src/main/java/org/commonmark/node/ThematicBreak.java
+++ b/commonmark/src/main/java/org/commonmark/node/ThematicBreak.java
@@ -1,9 +1,34 @@
package org.commonmark.node;
+/**
+ * A thematic break, e.g. between text:
+ *
+ * Some text
+ *
+ * ___
+ *
+ * Some other text.
+ *
+ *
+ * @see CommonMark Spec
+ */
public class ThematicBreak extends Block {
+ private String literal;
+
@Override
public void accept(Visitor visitor) {
visitor.visit(this);
}
+
+ /**
+ * @return the source literal that represents this node, if available
+ */
+ public String getLiteral() {
+ return literal;
+ }
+
+ public void setLiteral(String literal) {
+ this.literal = literal;
+ }
}
diff --git a/commonmark/src/main/java/org/commonmark/package-info.java b/commonmark/src/main/java/org/commonmark/package-info.java
index e784703e9..b683017f6 100644
--- a/commonmark/src/main/java/org/commonmark/package-info.java
+++ b/commonmark/src/main/java/org/commonmark/package-info.java
@@ -4,6 +4,7 @@
*
{@link org.commonmark.parser} for parsing input text to AST nodes
*
{@link org.commonmark.node} for AST node types and visitors
*
{@link org.commonmark.renderer.html} for HTML rendering
+ *
{@link org.commonmark.renderer.markdown} for Markdown rendering
*
*/
package org.commonmark;
diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java
index dae96e2c8..12007610b 100644
--- a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java
+++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java
@@ -1,9 +1,12 @@
package org.commonmark.parser;
import org.commonmark.node.LinkReferenceDefinition;
+import org.commonmark.parser.beta.LinkProcessor;
+import org.commonmark.parser.beta.InlineContentParserFactory;
import org.commonmark.parser.delimiter.DelimiterProcessor;
import java.util.List;
+import java.util.Set;
/**
* Context for inline parsing.
@@ -11,17 +14,47 @@
public interface InlineParserContext {
/**
- * @return custom delimiter processors that have been configured with {@link Parser.Builder#customDelimiterProcessor(DelimiterProcessor)}
+ * @return custom inline content parsers that have been configured with
+ * {@link Parser.Builder#customInlineContentParserFactory(InlineContentParserFactory)}
+ */
+ List getCustomInlineContentParserFactories();
+
+ /**
+ * @return custom delimiter processors that have been configured with
+ * {@link Parser.Builder#customDelimiterProcessor(DelimiterProcessor)}
*/
List getCustomDelimiterProcessors();
+ /**
+ * @return custom link processors that have been configured with {@link Parser.Builder#linkProcessor}.
+ */
+ List getCustomLinkProcessors();
+
+ /**
+ * @return custom link markers that have been configured with {@link Parser.Builder#linkMarker}.
+ */
+ Set getCustomLinkMarkers();
+
/**
* Look up a {@link LinkReferenceDefinition} for a given label.
*
- * Note that the label is not normalized yet; implementations are responsible for normalizing before lookup.
+ * Note that the passed in label does not need to be normalized; implementations are responsible for doing the
+ * normalization before lookup.
*
* @param label the link label to look up
* @return the definition if one exists, {@code null} otherwise
+ * @deprecated use {@link #getDefinition} with {@link LinkReferenceDefinition} instead
*/
+ @Deprecated
LinkReferenceDefinition getLinkReferenceDefinition(String label);
+
+ /**
+ * Look up a definition of a type for a given label.
+ *
+ * Note that the passed in label does not need to be normalized; implementations are responsible for doing the
+ * normalization before lookup.
+ *
+ * @return the definition if one exists, null otherwise
+ */
+ D getDefinition(Class type, String label);
}
diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java
index 34c384a8a..c1640e9d8 100644
--- a/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java
+++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java
@@ -4,5 +4,9 @@
* Factory for custom inline parser.
*/
public interface InlineParserFactory {
+
+ /**
+ * Create an {@link InlineParser} to use for parsing inlines. This is called once per parsed document.
+ */
InlineParser create(InlineParserContext inlineParserContext);
}
diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java
index 63cebb2eb..8faac789b 100644
--- a/commonmark/src/main/java/org/commonmark/parser/Parser.java
+++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java
@@ -1,19 +1,21 @@
package org.commonmark.parser;
import org.commonmark.Extension;
+import org.commonmark.internal.Definitions;
import org.commonmark.internal.DocumentParser;
import org.commonmark.internal.InlineParserContextImpl;
import org.commonmark.internal.InlineParserImpl;
-import org.commonmark.internal.LinkReferenceDefinitions;
import org.commonmark.node.*;
+import org.commonmark.parser.beta.LinkInfo;
+import org.commonmark.parser.beta.LinkProcessor;
+import org.commonmark.parser.beta.InlineContentParserFactory;
+import org.commonmark.parser.beta.LinkResult;
import org.commonmark.parser.block.BlockParserFactory;
import org.commonmark.parser.delimiter.DelimiterProcessor;
import java.io.IOException;
import java.io.Reader;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
/**
@@ -28,21 +30,31 @@
public class Parser {
private final List blockParserFactories;
+ private final List inlineContentParserFactories;
private final List delimiterProcessors;
+ private final List linkProcessors;
+ private final Set linkMarkers;
private final InlineParserFactory inlineParserFactory;
private final List postProcessors;
private final IncludeSourceSpans includeSourceSpans;
+ private final int maxOpenBlockParsers;
private Parser(Builder builder) {
this.blockParserFactories = DocumentParser.calculateBlockParserFactories(builder.blockParserFactories, builder.enabledBlockTypes);
this.inlineParserFactory = builder.getInlineParserFactory();
this.postProcessors = builder.postProcessors;
+ this.inlineContentParserFactories = builder.inlineContentParserFactories;
this.delimiterProcessors = builder.delimiterProcessors;
+ this.linkProcessors = builder.linkProcessors;
+ this.linkMarkers = builder.linkMarkers;
this.includeSourceSpans = builder.includeSourceSpans;
+ this.maxOpenBlockParsers = builder.maxOpenBlockParsers;
// Try to construct an inline parser. Invalid configuration might result in an exception, which we want to
// detect as soon as possible.
- this.inlineParserFactory.create(new InlineParserContextImpl(delimiterProcessors, new LinkReferenceDefinitions()));
+ var context = new InlineParserContextImpl(
+ inlineContentParserFactories, delimiterProcessors, linkProcessors, linkMarkers, new Definitions());
+ this.inlineParserFactory.create(context);
}
/**
@@ -63,9 +75,7 @@ public static Builder builder() {
* @return the root node
*/
public Node parse(String input) {
- if (input == null) {
- throw new NullPointerException("input must not be null");
- }
+ Objects.requireNonNull(input, "input must not be null");
DocumentParser documentParser = createDocumentParser();
Node document = documentParser.parse(input);
return postProcess(document);
@@ -90,17 +100,15 @@ public Node parse(String input) {
* @throws IOException when reading throws an exception
*/
public Node parseReader(Reader input) throws IOException {
- if (input == null) {
- throw new NullPointerException("input must not be null");
- }
-
+ Objects.requireNonNull(input, "input must not be null");
DocumentParser documentParser = createDocumentParser();
Node document = documentParser.parse(input);
return postProcess(document);
}
private DocumentParser createDocumentParser() {
- return new DocumentParser(blockParserFactories, inlineParserFactory, delimiterProcessors, includeSourceSpans);
+ return new DocumentParser(blockParserFactories, inlineParserFactory, inlineContentParserFactories,
+ delimiterProcessors, linkProcessors, linkMarkers, includeSourceSpans, maxOpenBlockParsers);
}
private Node postProcess(Node document) {
@@ -115,11 +123,15 @@ private Node postProcess(Node document) {
*/
public static class Builder {
private final List blockParserFactories = new ArrayList<>();
+ private final List inlineContentParserFactories = new ArrayList<>();
private final List delimiterProcessors = new ArrayList<>();
+ private final List linkProcessors = new ArrayList<>();
private final List postProcessors = new ArrayList<>();
+ private final Set linkMarkers = new HashSet<>();
private Set> enabledBlockTypes = DocumentParser.getDefaultBlockParserTypes();
private InlineParserFactory inlineParserFactory;
private IncludeSourceSpans includeSourceSpans = IncludeSourceSpans.NONE;
+ private int maxOpenBlockParsers = Integer.MAX_VALUE;
/**
* @return the configured {@link Parser}
@@ -133,9 +145,7 @@ public Parser build() {
* @return {@code this}
*/
public Builder extensions(Iterable extends Extension> extensions) {
- if (extensions == null) {
- throw new NullPointerException("extensions must not be null");
- }
+ Objects.requireNonNull(extensions, "extensions must not be null");
for (Extension extension : extensions) {
if (extension instanceof ParserExtension) {
ParserExtension parserExtension = (ParserExtension) extension;
@@ -164,24 +174,23 @@ public Builder extensions(Iterable extends Extension> extensions) {
* E.g., to only parse headings and lists:
*
*
* @param enabledBlockTypes A list of block nodes the parser will parse.
- * If this list is empty, the parser will not recognize any CommonMark core features.
+ * If this list is empty, the parser will not recognize any CommonMark core features.
* @return {@code this}
*/
public Builder enabledBlockTypes(Set> enabledBlockTypes) {
- if (enabledBlockTypes == null) {
- throw new NullPointerException("enabledBlockTypes must not be null");
- }
+ Objects.requireNonNull(enabledBlockTypes, "enabledBlockTypes must not be null");
+ DocumentParser.checkEnabledBlockTypes(enabledBlockTypes);
this.enabledBlockTypes = enabledBlockTypes;
return this;
}
/**
- * Whether to calculate {@link org.commonmark.node.SourceSpan} for {@link Node}.
+ * Whether to calculate source positions for parsed {@link Node Nodes}, see {@link Node#getSourceSpans()}.
*
* By default, source spans are disabled.
*
@@ -195,7 +204,28 @@ public Builder includeSourceSpans(IncludeSourceSpans includeSourceSpans) {
}
/**
- * Adds a custom block parser factory.
+ * Limit how many block parsers may be open at once while parsing.
+ *
+ * Once the limit is reached, additional block starts are treated as plain text instead of
+ * creating deeper nested block structure.
+ *
+ * The document root parser is not counted. The default is unlimited, so callers that keep
+ * using {@code Parser.builder().build()} preserve behavior.
+ *
+ * @param maxOpenBlockParsers maximum number of open non-document block parsers, must be
+ * zero or greater
+ * @return {@code this}
+ */
+ public Builder maxOpenBlockParsers(int maxOpenBlockParsers) {
+ if (maxOpenBlockParsers < 0) {
+ throw new IllegalArgumentException("maxOpenBlockParsers must be >= 0");
+ }
+ this.maxOpenBlockParsers = maxOpenBlockParsers;
+ return this;
+ }
+
+ /**
+ * Add a custom block parser factory.
*
* Note that custom factories are applied before the built-in factories. This is so that
* extensions can change how some syntax is parsed that would otherwise be handled by built-in factories.
@@ -205,35 +235,78 @@ public Builder includeSourceSpans(IncludeSourceSpans includeSourceSpans) {
* @return {@code this}
*/
public Builder customBlockParserFactory(BlockParserFactory blockParserFactory) {
- if (blockParserFactory == null) {
- throw new NullPointerException("blockParserFactory must not be null");
- }
+ Objects.requireNonNull(blockParserFactory, "blockParserFactory must not be null");
blockParserFactories.add(blockParserFactory);
return this;
}
/**
- * Adds a custom delimiter processor.
+ * Add a factory for a custom inline content parser, for extending inline parsing or overriding built-in parsing.
+ *
+ * Note that parsers are triggered based on a special character as specified by
+ * {@link InlineContentParserFactory#getTriggerCharacters()}. It is possible to register multiple parsers for the same
+ * character, or even for some built-in special character such as {@code `}. The custom parsers are tried first
+ * in order in which they are registered, and then the built-in ones.
+ */
+ public Builder customInlineContentParserFactory(InlineContentParserFactory inlineContentParserFactory) {
+ Objects.requireNonNull(inlineContentParserFactory, "inlineContentParser must not be null");
+ inlineContentParserFactories.add(inlineContentParserFactory);
+ return this;
+ }
+
+ /**
+ * Add a custom delimiter processor for inline parsing.
*
* Note that multiple delimiter processors with the same characters can be added, as long as they have a
* different minimum length. In that case, the processor with the shortest matching length is used. Adding more
* than one delimiter processor with the same character and minimum length is invalid.
+ *
+ * If you want more control over how parsing is done, you might want to use
+ * {@link #customInlineContentParserFactory} instead.
*
* @param delimiterProcessor a delimiter processor implementation
* @return {@code this}
*/
public Builder customDelimiterProcessor(DelimiterProcessor delimiterProcessor) {
- if (delimiterProcessor == null) {
- throw new NullPointerException("delimiterProcessor must not be null");
- }
+ Objects.requireNonNull(delimiterProcessor, "delimiterProcessor must not be null");
delimiterProcessors.add(delimiterProcessor);
return this;
}
+ /**
+ * Add a custom link/image processor for inline parsing.
+ *
+ * Multiple link processors can be added, and will be tried in order in which they were added. If no link
+ * processor applies, the normal behavior applies. That means these can override built-in link parsing.
+ *
+ * @param linkProcessor a link processor implementation
+ * @return {@code this}
+ */
+ public Builder linkProcessor(LinkProcessor linkProcessor) {
+ Objects.requireNonNull(linkProcessor, "linkProcessor must not be null");
+ linkProcessors.add(linkProcessor);
+ return this;
+ }
+
+ /**
+ * Add a custom link marker for link processing. A link marker is a character like {@code !} which, if it
+ * appears before the {@code [} of a link, changes the meaning of the link.
+ *
+ * If a link marker followed by a valid link is parsed, the {@link org.commonmark.parser.beta.LinkInfo}
+ * that is passed to {@link LinkProcessor} will have its {@link LinkInfo#marker()} set. A link processor should
+ * check the {@link Text#getLiteral()} and then do any processing, and will probably want to use {@link LinkResult#includeMarker()}.
+ *
+ * @param linkMarker a link marker character
+ * @return {@code this}
+ */
+ public Builder linkMarker(Character linkMarker) {
+ Objects.requireNonNull(linkMarker, "linkMarker must not be null");
+ linkMarkers.add(linkMarker);
+ return this;
+ }
+
public Builder postProcessor(PostProcessor postProcessor) {
- if (postProcessor == null) {
- throw new NullPointerException("postProcessor must not be null");
- }
+ Objects.requireNonNull(postProcessor, "postProcessor must not be null");
postProcessors.add(postProcessor);
return this;
}
@@ -264,13 +337,9 @@ public Builder inlineParserFactory(InlineParserFactory inlineParserFactory) {
private InlineParserFactory getInlineParserFactory() {
if (inlineParserFactory != null) {
return inlineParserFactory;
+ } else {
+ return InlineParserImpl::new;
}
- return new InlineParserFactory() {
- @Override
- public InlineParser create(InlineParserContext inlineParserContext) {
- return new InlineParserImpl(inlineParserContext);
- }
- };
}
}
diff --git a/commonmark/src/main/java/org/commonmark/parser/SourceLine.java b/commonmark/src/main/java/org/commonmark/parser/SourceLine.java
index 63caceb9e..92a8cdfaf 100644
--- a/commonmark/src/main/java/org/commonmark/parser/SourceLine.java
+++ b/commonmark/src/main/java/org/commonmark/parser/SourceLine.java
@@ -2,6 +2,8 @@
import org.commonmark.node.SourceSpan;
+import java.util.Objects;
+
/**
* A line or part of a line from the input source.
*
@@ -17,10 +19,7 @@ public static SourceLine of(CharSequence content, SourceSpan sourceSpan) {
}
private SourceLine(CharSequence content, SourceSpan sourceSpan) {
- if (content == null) {
- throw new NullPointerException("content must not be null");
- }
- this.content = content;
+ this.content = Objects.requireNonNull(content, "content must not be null");
this.sourceSpan = sourceSpan;
}
@@ -36,10 +35,11 @@ public SourceLine substring(int beginIndex, int endIndex) {
CharSequence newContent = content.subSequence(beginIndex, endIndex);
SourceSpan newSourceSpan = null;
if (sourceSpan != null) {
- int columnIndex = sourceSpan.getColumnIndex() + beginIndex;
int length = endIndex - beginIndex;
if (length != 0) {
- newSourceSpan = SourceSpan.of(sourceSpan.getLineIndex(), columnIndex, length);
+ int columnIndex = sourceSpan.getColumnIndex() + beginIndex;
+ int inputIndex = sourceSpan.getInputIndex() + beginIndex;
+ newSourceSpan = SourceSpan.of(sourceSpan.getLineIndex(), columnIndex, inputIndex, length);
}
}
return SourceLine.of(newContent, newSourceSpan);
diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParser.java b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParser.java
new file mode 100644
index 000000000..bc5c9a54f
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParser.java
@@ -0,0 +1,21 @@
+package org.commonmark.parser.beta;
+
+/**
+ * Parser for a type of inline content. Registered via a {@link InlineContentParserFactory} and created by its
+ * {@link InlineContentParserFactory#create() create} method. The lifetime of this is tied to each inline content
+ * snippet that is parsed, as a new instance is created for each.
+ */
+public interface InlineContentParser {
+
+ /**
+ * Try to parse inline content starting from the current position. Note that the character at the current position
+ * is one of {@link InlineContentParserFactory#getTriggerCharacters()} of the factory that created this parser.
+ *
+ * For a given inline content snippet that is being parsed, this method can be called multiple times: each time a
+ * trigger character is encountered.
+ *
+ * @param inlineParserState the current state of the inline parser
+ * @return the result of parsing; can indicate that this parser is not interested, or that parsing was successful
+ */
+ ParsedInline tryParse(InlineParserState inlineParserState);
+}
diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java
new file mode 100644
index 000000000..c86f93a41
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java
@@ -0,0 +1,24 @@
+package org.commonmark.parser.beta;
+
+import java.util.Set;
+
+/**
+ * A factory for extending inline content parsing.
+ *
+ * See {@link org.commonmark.parser.Parser.Builder#customInlineContentParserFactory} for how to register it.
+ */
+public interface InlineContentParserFactory {
+
+ /**
+ * An inline content parser needs to have a special "trigger" character which activates it. When this character is
+ * encountered during inline parsing, {@link InlineContentParser#tryParse} is called with the current parser state.
+ * It can also register for more than one trigger character.
+ */
+ Set getTriggerCharacters();
+
+ /**
+ * Create an {@link InlineContentParser} that will do the parsing. Create is called once per text snippet of inline
+ * content inside block structures, and then called each time a trigger character is encountered.
+ */
+ InlineContentParser create();
+}
diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/InlineParserState.java b/commonmark/src/main/java/org/commonmark/parser/beta/InlineParserState.java
similarity index 74%
rename from commonmark/src/main/java/org/commonmark/internal/inline/InlineParserState.java
rename to commonmark/src/main/java/org/commonmark/parser/beta/InlineParserState.java
index f6cb6bf49..e434d45d6 100644
--- a/commonmark/src/main/java/org/commonmark/internal/inline/InlineParserState.java
+++ b/commonmark/src/main/java/org/commonmark/parser/beta/InlineParserState.java
@@ -1,10 +1,10 @@
-package org.commonmark.internal.inline;
+package org.commonmark.parser.beta;
public interface InlineParserState {
/**
- * Return a scanner for the input for the current position (on the character that the inline parser registered
- * interest for).
+ * Return a scanner for the input for the current position (on the trigger character that the inline parser was
+ * added for).
*
* Note that this always returns the same instance, if you want to backtrack you need to use
* {@link Scanner#position()} and {@link Scanner#setPosition(Position)}.
diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/LinkInfo.java b/commonmark/src/main/java/org/commonmark/parser/beta/LinkInfo.java
new file mode 100644
index 000000000..b2fda57e4
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/parser/beta/LinkInfo.java
@@ -0,0 +1,69 @@
+package org.commonmark.parser.beta;
+
+import org.commonmark.node.Text;
+
+/**
+ * A parsed link/image. There are different types of links.
+ *
+ * Reference links, which have different subtypes. Full::
+ *
+ * [text][label]
+ *
+ * Collapsed (label is ""):
+ *
+ * [text][]
+ *
+ * Shortcut (label is null):
+ *
+ * [text]
+ *
+ * Images use the same syntax as links but with a {@code !} {@link #marker()} front, e.g. {@code }.
+ */
+public interface LinkInfo {
+
+ /**
+ * The marker if present, or null. A marker is e.g. {@code !} for an image, or a custom marker as specified in
+ * {@link org.commonmark.parser.Parser.Builder#linkMarker}.
+ */
+ Text marker();
+
+ /**
+ * The text node of the opening bracket {@code [}.
+ */
+ Text openingBracket();
+
+ /**
+ * The text between the first brackets, e.g. `foo` in `[foo][bar]`.
+ */
+ String text();
+
+ /**
+ * The label, or null for inline links or for shortcut links (in which case {@link #text()} should be used as the label).
+ */
+ String label();
+
+ /**
+ * The destination if available, e.g. in `[foo](destination)`, or null
+ */
+ String destination();
+
+ /**
+ * The title if available, e.g. in `[foo](destination "title")`, or null
+ */
+ String title();
+
+ /**
+ * The position after the closing text bracket, e.g.:
+ *
+ * [foo][bar]
+ * ^
+ *
+ */
+ Position afterTextBracket();
+}
diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/LinkProcessor.java b/commonmark/src/main/java/org/commonmark/parser/beta/LinkProcessor.java
new file mode 100644
index 000000000..3e448fd91
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/parser/beta/LinkProcessor.java
@@ -0,0 +1,40 @@
+package org.commonmark.parser.beta;
+
+import org.commonmark.parser.InlineParserContext;
+
+/**
+ * An interface to decide how links/images are handled.
+ *
+ * Implementations need to be registered with a parser via {@link org.commonmark.parser.Parser.Builder#linkProcessor}.
+ * Then, when inline parsing is run, each parsed link/image is passed to the processor. This includes links like these:
+ *
+ * See {@link LinkInfo} for accessing various parts of the parsed link/image.
+ *
+ * The processor can then inspect the link/image and decide what to do with it by returning the appropriate
+ * {@link LinkResult}. If it returns {@link LinkResult#none()}, the next registered processor is tried. If none of them
+ * apply, the link is handled as it normally would.
+ */
+public interface LinkProcessor {
+
+ /**
+ * @param linkInfo information about the parsed link/image
+ * @param scanner the scanner at the current position after the parsed link/image
+ * @param context context for inline parsing
+ * @return what to do with the link/image, e.g. do nothing (try the next processor), wrap the text in a node, or
+ * replace the link/image with a node
+ */
+ LinkResult process(LinkInfo linkInfo, Scanner scanner, InlineParserContext context);
+}
diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/LinkResult.java b/commonmark/src/main/java/org/commonmark/parser/beta/LinkResult.java
new file mode 100644
index 000000000..43bc82af8
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/parser/beta/LinkResult.java
@@ -0,0 +1,50 @@
+package org.commonmark.parser.beta;
+
+import org.commonmark.internal.inline.LinkResultImpl;
+import org.commonmark.node.Node;
+
+/**
+ * What to do with a link/image processed by {@link LinkProcessor}.
+ */
+public interface LinkResult {
+ /**
+ * Link not handled by processor.
+ */
+ static LinkResult none() {
+ return null;
+ }
+
+ /**
+ * Wrap the link text in a node. This is the normal behavior for links, e.g. for this:
+ *
+ * [my *text*](destination)
+ *
+ * The text is {@code my *text*}, a text node and emphasis. The text is wrapped in a
+ * {@link org.commonmark.node.Link} node, which means the text is added as child nodes to it.
+ *
+ * @param node the node to which the link text nodes will be added as child nodes
+ * @param position the position to continue parsing from
+ */
+ static LinkResult wrapTextIn(Node node, Position position) {
+ return new LinkResultImpl(LinkResultImpl.Type.WRAP, node, position);
+ }
+
+ /**
+ * Replace the link with a node. E.g. for this:
+ *
+ * [^foo]
+ *
+ * The processor could decide to create a {@code FootnoteReference} node instead which replaces the link.
+ *
+ * @param node the node to replace the link with
+ * @param position the position to continue parsing from
+ */
+ static LinkResult replaceWith(Node node, Position position) {
+ return new LinkResultImpl(LinkResultImpl.Type.REPLACE, node, position);
+ }
+
+ /**
+ * If a {@link LinkInfo#marker()} is present, include it in processing (i.e. treat it the same way as the brackets).
+ */
+ LinkResult includeMarker();
+}
diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/ParsedInline.java b/commonmark/src/main/java/org/commonmark/parser/beta/ParsedInline.java
new file mode 100644
index 000000000..5d1402cae
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/parser/beta/ParsedInline.java
@@ -0,0 +1,24 @@
+package org.commonmark.parser.beta;
+
+import org.commonmark.internal.inline.ParsedInlineImpl;
+import org.commonmark.node.Node;
+
+import java.util.Objects;
+
+/**
+ * The result of a single inline parser. Use the static methods to create instances.
+ *
+ * This interface is not intended to be implemented by clients.
+ */
+public interface ParsedInline {
+
+ static ParsedInline none() {
+ return null;
+ }
+
+ static ParsedInline of(Node node, Position position) {
+ Objects.requireNonNull(node, "node must not be null");
+ Objects.requireNonNull(position, "position must not be null");
+ return new ParsedInlineImpl(node, position);
+ }
+}
diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/Position.java b/commonmark/src/main/java/org/commonmark/parser/beta/Position.java
similarity index 89%
rename from commonmark/src/main/java/org/commonmark/internal/inline/Position.java
rename to commonmark/src/main/java/org/commonmark/parser/beta/Position.java
index 5f06a063a..3dbb4870f 100644
--- a/commonmark/src/main/java/org/commonmark/internal/inline/Position.java
+++ b/commonmark/src/main/java/org/commonmark/parser/beta/Position.java
@@ -1,4 +1,4 @@
-package org.commonmark.internal.inline;
+package org.commonmark.parser.beta;
/**
* Position within a {@link Scanner}. This is intentionally kept opaque so as not to expose the internal structure of
diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/Scanner.java b/commonmark/src/main/java/org/commonmark/parser/beta/Scanner.java
similarity index 97%
rename from commonmark/src/main/java/org/commonmark/internal/inline/Scanner.java
rename to commonmark/src/main/java/org/commonmark/parser/beta/Scanner.java
index 9de96a587..324639493 100644
--- a/commonmark/src/main/java/org/commonmark/internal/inline/Scanner.java
+++ b/commonmark/src/main/java/org/commonmark/parser/beta/Scanner.java
@@ -1,9 +1,9 @@
-package org.commonmark.internal.inline;
+package org.commonmark.parser.beta;
-import org.commonmark.internal.util.CharMatcher;
import org.commonmark.node.SourceSpan;
import org.commonmark.parser.SourceLine;
import org.commonmark.parser.SourceLines;
+import org.commonmark.text.CharMatcher;
import java.util.List;
@@ -244,7 +244,7 @@ public SourceLines getSource(Position begin, Position end) {
SourceSpan newSourceSpan = null;
SourceSpan sourceSpan = line.getSourceSpan();
if (sourceSpan != null) {
- newSourceSpan = SourceSpan.of(sourceSpan.getLineIndex(), sourceSpan.getColumnIndex() + begin.index, newContent.length());
+ newSourceSpan = sourceSpan.subSpan(begin.index, end.index);
}
return SourceLines.of(SourceLine.of(newContent, newSourceSpan));
} else {
diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/package-info.java b/commonmark/src/main/java/org/commonmark/parser/beta/package-info.java
new file mode 100644
index 000000000..029d80507
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/parser/beta/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Experimental APIs to use for extensions. APIs are subject to change if necessary.
+ */
+package org.commonmark.parser.beta;
diff --git a/commonmark/src/main/java/org/commonmark/parser/block/AbstractBlockParser.java b/commonmark/src/main/java/org/commonmark/parser/block/AbstractBlockParser.java
index 3d4cbb77b..4fb1a05ac 100644
--- a/commonmark/src/main/java/org/commonmark/parser/block/AbstractBlockParser.java
+++ b/commonmark/src/main/java/org/commonmark/parser/block/AbstractBlockParser.java
@@ -1,10 +1,13 @@
package org.commonmark.parser.block;
import org.commonmark.node.Block;
+import org.commonmark.node.DefinitionMap;
import org.commonmark.node.SourceSpan;
import org.commonmark.parser.InlineParser;
import org.commonmark.parser.SourceLine;
+import java.util.List;
+
public abstract class AbstractBlockParser implements BlockParser {
@Override
@@ -31,6 +34,11 @@ public void addSourceSpan(SourceSpan sourceSpan) {
getBlock().addSourceSpan(sourceSpan);
}
+ @Override
+ public List> getDefinitions() {
+ return List.of();
+ }
+
@Override
public void closeBlock() {
}
diff --git a/commonmark/src/main/java/org/commonmark/parser/block/BlockParser.java b/commonmark/src/main/java/org/commonmark/parser/block/BlockParser.java
index aa956a48a..32ff2a474 100644
--- a/commonmark/src/main/java/org/commonmark/parser/block/BlockParser.java
+++ b/commonmark/src/main/java/org/commonmark/parser/block/BlockParser.java
@@ -1,10 +1,13 @@
package org.commonmark.parser.block;
import org.commonmark.node.Block;
+import org.commonmark.node.DefinitionMap;
import org.commonmark.node.SourceSpan;
import org.commonmark.parser.InlineParser;
import org.commonmark.parser.SourceLine;
+import java.util.List;
+
/**
* Parser for a specific block node.
*
@@ -34,6 +37,10 @@ public interface BlockParser {
BlockContinue tryContinue(ParserState parserState);
+ /**
+ * Add the part of a line that belongs to this block parser to parse (i.e. without any container block markers).
+ * Note that the line will only include a {@link SourceLine#getSourceSpan()} if source spans are enabled for inlines.
+ */
void addLine(SourceLine line);
/**
@@ -45,6 +52,12 @@ public interface BlockParser {
*/
void addSourceSpan(SourceSpan sourceSpan);
+ /**
+ * Return definitions parsed by this parser. The definitions returned here can later be accessed during inline
+ * parsing via {@link org.commonmark.parser.InlineParserContext#getDefinition}.
+ */
+ List> getDefinitions();
+
void closeBlock();
void parseInlines(InlineParser inlineParser);
diff --git a/commonmark/src/main/java/org/commonmark/parser/block/BlockStart.java b/commonmark/src/main/java/org/commonmark/parser/block/BlockStart.java
index d9e7a2b49..c41f1caa3 100644
--- a/commonmark/src/main/java/org/commonmark/parser/block/BlockStart.java
+++ b/commonmark/src/main/java/org/commonmark/parser/block/BlockStart.java
@@ -10,18 +10,59 @@ public abstract class BlockStart {
protected BlockStart() {
}
+ /**
+ * Result for when there is no block start.
+ */
public static BlockStart none() {
return null;
}
+ /**
+ * Start block(s) with the specified parser(s).
+ */
public static BlockStart of(BlockParser... blockParsers) {
return new BlockStartImpl(blockParsers);
}
+ /**
+ * Continue parsing at the specified index.
+ *
+ * @param newIndex the new index, see {@link ParserState#getIndex()}
+ */
public abstract BlockStart atIndex(int newIndex);
+ /**
+ * Continue parsing at the specified column (for tab handling).
+ *
+ * @param newColumn the new column, see {@link ParserState#getColumn()}
+ */
public abstract BlockStart atColumn(int newColumn);
+ /**
+ * @deprecated use {@link #replaceParagraphLines(int)} instead; please raise an issue if that doesn't work for you
+ * for some reason.
+ */
+ @Deprecated
public abstract BlockStart replaceActiveBlockParser();
+ /**
+ * Replace a number of lines from the current paragraph (as returned by
+ * {@link MatchedBlockParser#getParagraphLines()}) with the new block.
+ *
+ * This is useful for parsing blocks that start with normal paragraphs and only have special marker syntax in later
+ * lines, e.g. in this:
+ *
+ * Foo
+ * ===
+ *
+ * The Foo line is initially parsed as a normal paragraph, then === is parsed as a heading
+ * marker, replacing the 1 paragraph line before. The end result is a single Heading block.
+ *
+ * Note that source spans from the replaced lines are automatically added to the new block.
+ *
+ * @param lines the number of lines to replace (at least 1); use {@link Integer#MAX_VALUE} to replace the whole
+ * paragraph
+ */
+ public abstract BlockStart replaceParagraphLines(int lines);
+
}
diff --git a/commonmark/src/main/java/org/commonmark/parser/block/MatchedBlockParser.java b/commonmark/src/main/java/org/commonmark/parser/block/MatchedBlockParser.java
index 1f2bcfb2a..c4619d8c2 100644
--- a/commonmark/src/main/java/org/commonmark/parser/block/MatchedBlockParser.java
+++ b/commonmark/src/main/java/org/commonmark/parser/block/MatchedBlockParser.java
@@ -12,7 +12,8 @@ public interface MatchedBlockParser {
BlockParser getMatchedBlockParser();
/**
- * Returns the current paragraph lines if the matched block is a paragraph.
+ * Returns the current paragraph lines if the matched block is a paragraph. If you want to use some or all of the
+ * lines for starting a new block instead, use {@link BlockStart#replaceParagraphLines(int)}.
*
* @return paragraph content or an empty list
*/
diff --git a/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java b/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java
index 897943d66..3b6abf214 100644
--- a/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java
+++ b/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java
@@ -6,6 +6,8 @@
* Custom delimiter processor for additional delimiters besides {@code _} and {@code *}.
*
* Note that implementations of this need to be thread-safe, the same instance may be used by multiple parsers.
+ *
+ * @see org.commonmark.parser.beta.InlineContentParserFactory
*/
public interface DelimiterProcessor {
diff --git a/commonmark/src/main/java/org/commonmark/renderer/NodeRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/NodeRenderer.java
index e2d5ebc96..4ae4b5dcd 100644
--- a/commonmark/src/main/java/org/commonmark/renderer/NodeRenderer.java
+++ b/commonmark/src/main/java/org/commonmark/renderer/NodeRenderer.java
@@ -20,4 +20,20 @@ public interface NodeRenderer {
* @param node the node to render, will be an instance of one of {@link #getNodeTypes()}
*/
void render(Node node);
+
+ /**
+ * Called before the root node is rendered, to do any initial processing at the start.
+ *
+ * @param rootNode the root (top-level) node
+ */
+ default void beforeRoot(Node rootNode) {
+ }
+
+ /**
+ * Called after the root node is rendered, to do any final processing at the end.
+ *
+ * @param rootNode the root (top-level) node
+ */
+ default void afterRoot(Node rootNode) {
+ }
}
diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/CoreHtmlNodeRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/html/CoreHtmlNodeRenderer.java
index 7d3552668..5c536558e 100644
--- a/commonmark/src/main/java/org/commonmark/renderer/html/CoreHtmlNodeRenderer.java
+++ b/commonmark/src/main/java/org/commonmark/renderer/html/CoreHtmlNodeRenderer.java
@@ -3,7 +3,9 @@
import org.commonmark.node.*;
import org.commonmark.renderer.NodeRenderer;
-import java.util.*;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
/**
* The node renderer that renders all the core nodes (comes last in the order of node renderers).
@@ -20,7 +22,7 @@ public CoreHtmlNodeRenderer(HtmlNodeRendererContext context) {
@Override
public Set> getNodeTypes() {
- return new HashSet<>(Arrays.asList(
+ return Set.of(
Document.class,
Heading.class,
Paragraph.class,
@@ -41,7 +43,7 @@ public Set> getNodeTypes() {
HtmlInline.class,
SoftLineBreak.class,
HardLineBreak.class
- ));
+ );
}
@Override
@@ -67,13 +69,15 @@ public void visit(Heading heading) {
@Override
public void visit(Paragraph paragraph) {
- boolean inTightList = isInTightList(paragraph);
- if (!inTightList) {
+ boolean omitP = isInTightList(paragraph) || //
+ (context.shouldOmitSingleParagraphP() && paragraph.getParent() instanceof Document && //
+ paragraph.getPrevious() == null && paragraph.getNext() == null);
+ if (!omitP) {
html.line();
html.tag("p", getAttrs(paragraph, "p"));
}
visitChildren(paragraph);
- if (!inTightList) {
+ if (!omitP) {
html.tag("/p");
html.line();
}
@@ -135,7 +139,7 @@ public void visit(ThematicBreak thematicBreak) {
@Override
public void visit(IndentedCodeBlock indentedCodeBlock) {
- renderCodeBlock(indentedCodeBlock.getLiteral(), indentedCodeBlock, Collections.emptyMap());
+ renderCodeBlock(indentedCodeBlock.getLiteral(), indentedCodeBlock, Map.of());
}
@Override
@@ -168,7 +172,7 @@ public void visit(ListItem listItem) {
@Override
public void visit(OrderedList orderedList) {
- int start = orderedList.getStartNumber();
+ int start = orderedList.getMarkerStartNumber() != null ? orderedList.getMarkerStartNumber() : 1;
Map attrs = new LinkedHashMap<>();
if (start != 1) {
attrs.put("start", String.valueOf(start));
@@ -287,7 +291,7 @@ private boolean isInTightList(Paragraph paragraph) {
}
private Map getAttrs(Node node, String tagName) {
- return getAttrs(node, tagName, Collections.emptyMap());
+ return getAttrs(node, tagName, Map.of());
}
private Map getAttrs(Node node, String tagName, Map defaultAttributes) {
@@ -307,6 +311,11 @@ public void visit(Text text) {
sb.append(text.getLiteral());
}
+ @Override
+ public void visit(Code code) {
+ sb.append(code.getLiteral());
+ }
+
@Override
public void visit(SoftLineBreak softLineBreak) {
sb.append('\n');
diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/DefaultUrlSanitizer.java b/commonmark/src/main/java/org/commonmark/renderer/html/DefaultUrlSanitizer.java
index 6cc96c5e7..4c5bed12c 100644
--- a/commonmark/src/main/java/org/commonmark/renderer/html/DefaultUrlSanitizer.java
+++ b/commonmark/src/main/java/org/commonmark/renderer/html/DefaultUrlSanitizer.java
@@ -1,13 +1,10 @@
package org.commonmark.renderer.html;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Set;
+import java.util.*;
/**
*
- * Allows http, https and mailto protocols for url.
+ * Allows http, https, mailto, and data protocols for url.
* Also allows protocol relative urls, and relative urls.
* Implementation based on https://github.com/OWASP/java-html-sanitizer/blob/f07e44b034a45d94d6fd010279073c38b6933072/src/main/java/org/owasp/html/FilterUrlByProtocolAttributePolicy.java
*/
@@ -15,7 +12,7 @@ public class DefaultUrlSanitizer implements UrlSanitizer {
private Set protocols;
public DefaultUrlSanitizer() {
- this(Arrays.asList("http", "https", "mailto"));
+ this(List.of("http", "https", "mailto", "data"));
}
public DefaultUrlSanitizer(Collection protocols) {
diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlNodeRendererContext.java b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlNodeRendererContext.java
index eb950ffa6..eecff0f44 100644
--- a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlNodeRendererContext.java
+++ b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlNodeRendererContext.java
@@ -17,8 +17,8 @@ public interface HtmlNodeRendererContext {
/**
* Let extensions modify the HTML tag attributes.
*
- * @param node the node for which the attributes are applied
- * @param tagName the HTML tag name that these attributes are for (e.g. {@code h1}, {@code pre}, {@code code}).
+ * @param node the node for which the attributes are applied
+ * @param tagName the HTML tag name that these attributes are for (e.g. {@code h1}, {@code pre}, {@code code}).
* @param attributes the attributes that were calculated by the renderer
* @return the extended attributes with added/updated/removed entries
*/
@@ -47,6 +47,11 @@ public interface HtmlNodeRendererContext {
*/
boolean shouldEscapeHtml();
+ /**
+ * @return whether documents that only contain a single paragraph should be rendered without the {@code
} tag
+ */
+ boolean shouldOmitSingleParagraphP();
+
/**
* @return true if the {@link UrlSanitizer} should be used.
* @since 0.14.0
diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlRenderer.java
index 19f53594f..386abebf0 100644
--- a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlRenderer.java
+++ b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlRenderer.java
@@ -7,10 +7,7 @@
import org.commonmark.renderer.NodeRenderer;
import org.commonmark.renderer.Renderer;
-import java.util.ArrayList;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
/**
* Renders a tree of nodes to HTML.
@@ -25,17 +22,19 @@ public class HtmlRenderer implements Renderer {
private final String softbreak;
private final boolean escapeHtml;
+ private final boolean percentEncodeUrls;
+ private final boolean omitSingleParagraphP;
private final boolean sanitizeUrls;
private final UrlSanitizer urlSanitizer;
- private final boolean percentEncodeUrls;
private final List attributeProviderFactories;
private final List nodeRendererFactories;
private HtmlRenderer(Builder builder) {
this.softbreak = builder.softbreak;
this.escapeHtml = builder.escapeHtml;
- this.sanitizeUrls = builder.sanitizeUrls;
this.percentEncodeUrls = builder.percentEncodeUrls;
+ this.omitSingleParagraphP = builder.omitSingleParagraphP;
+ this.sanitizeUrls = builder.sanitizeUrls;
this.urlSanitizer = builder.urlSanitizer;
this.attributeProviderFactories = new ArrayList<>(builder.attributeProviderFactories);
@@ -61,18 +60,16 @@ public static Builder builder() {
@Override
public void render(Node node, Appendable output) {
- if (node == null) {
- throw new NullPointerException("node must not be null");
- }
+ Objects.requireNonNull(node, "node must not be null");
RendererContext context = new RendererContext(new HtmlWriter(output));
+ context.beforeRoot(node);
context.render(node);
+ context.afterRoot(node);
}
@Override
public String render(Node node) {
- if (node == null) {
- throw new NullPointerException("node must not be null");
- }
+ Objects.requireNonNull(node, "node must not be null");
StringBuilder sb = new StringBuilder();
render(node, sb);
return sb.toString();
@@ -88,6 +85,7 @@ public static class Builder {
private boolean sanitizeUrls = false;
private UrlSanitizer urlSanitizer = new DefaultUrlSanitizer();
private boolean percentEncodeUrls = false;
+ private boolean omitSingleParagraphP = false;
private List attributeProviderFactories = new ArrayList<>();
private List nodeRendererFactories = new ArrayList<>();
@@ -171,6 +169,17 @@ public Builder percentEncodeUrls(boolean percentEncodeUrls) {
return this;
}
+ /**
+ * Whether documents that only contain a single paragraph should be rendered without the {@code
} tag. Set to
+ * {@code true} to render without the tag; the default of {@code false} always renders the tag.
+ *
+ * @return {@code this}
+ */
+ public Builder omitSingleParagraphP(boolean omitSingleParagraphP) {
+ this.omitSingleParagraphP = omitSingleParagraphP;
+ return this;
+ }
+
/**
* Add a factory for an attribute provider for adding/changing HTML attributes to the rendered tags.
*
@@ -178,9 +187,7 @@ public Builder percentEncodeUrls(boolean percentEncodeUrls) {
* @return {@code this}
*/
public Builder attributeProviderFactory(AttributeProviderFactory attributeProviderFactory) {
- if (attributeProviderFactory == null) {
- throw new NullPointerException("attributeProviderFactory must not be null");
- }
+ Objects.requireNonNull(attributeProviderFactory, "attributeProviderFactory must not be null");
this.attributeProviderFactories.add(attributeProviderFactory);
return this;
}
@@ -196,9 +203,7 @@ public Builder attributeProviderFactory(AttributeProviderFactory attributeProvid
* @return {@code this}
*/
public Builder nodeRendererFactory(HtmlNodeRendererFactory nodeRendererFactory) {
- if (nodeRendererFactory == null) {
- throw new NullPointerException("nodeRendererFactory must not be null");
- }
+ Objects.requireNonNull(nodeRendererFactory, "nodeRendererFactory must not be null");
this.nodeRendererFactories.add(nodeRendererFactory);
return this;
}
@@ -208,9 +213,7 @@ public Builder nodeRendererFactory(HtmlNodeRendererFactory nodeRendererFactory)
* @return {@code this}
*/
public Builder extensions(Iterable extends Extension> extensions) {
- if (extensions == null) {
- throw new NullPointerException("extensions must not be null");
- }
+ Objects.requireNonNull(extensions, "extensions must not be null");
for (Extension extension : extensions) {
if (extension instanceof HtmlRendererExtension) {
HtmlRendererExtension htmlRendererExtension = (HtmlRendererExtension) extension;
@@ -238,15 +241,13 @@ private RendererContext(HtmlWriter htmlWriter) {
this.htmlWriter = htmlWriter;
attributeProviders = new ArrayList<>(attributeProviderFactories.size());
- for (AttributeProviderFactory attributeProviderFactory : attributeProviderFactories) {
+ for (var attributeProviderFactory : attributeProviderFactories) {
attributeProviders.add(attributeProviderFactory.create(this));
}
- // The first node renderer for a node type "wins".
- for (int i = nodeRendererFactories.size() - 1; i >= 0; i--) {
- HtmlNodeRendererFactory nodeRendererFactory = nodeRendererFactories.get(i);
- NodeRenderer nodeRenderer = nodeRendererFactory.create(this);
- nodeRendererMap.add(nodeRenderer);
+ for (var factory : nodeRendererFactories) {
+ var renderer = factory.create(this);
+ nodeRendererMap.add(renderer);
}
}
@@ -255,6 +256,11 @@ public boolean shouldEscapeHtml() {
return escapeHtml;
}
+ @Override
+ public boolean shouldOmitSingleParagraphP() {
+ return omitSingleParagraphP;
+ }
+
@Override
public boolean shouldSanitizeUrls() {
return sanitizeUrls;
@@ -296,6 +302,14 @@ public void render(Node node) {
nodeRendererMap.render(node);
}
+ public void beforeRoot(Node node) {
+ nodeRendererMap.beforeRoot(node);
+ }
+
+ public void afterRoot(Node node) {
+ nodeRendererMap.afterRoot(node);
+ }
+
private void setCustomAttributes(Node node, String tagName, Map attrs) {
for (AttributeProvider attributeProvider : attributeProviders) {
attributeProvider.setAttributes(node, tagName, attrs);
diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlWriter.java b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlWriter.java
index 8c79eb8b4..a4ac05d45 100644
--- a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlWriter.java
+++ b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlWriter.java
@@ -3,20 +3,18 @@
import org.commonmark.internal.util.Escaping;
import java.io.IOException;
-import java.util.Collections;
import java.util.Map;
+import java.util.Objects;
public class HtmlWriter {
- private static final Map NO_ATTRIBUTES = Collections.emptyMap();
+ private static final Map NO_ATTRIBUTES = Map.of();
private final Appendable buffer;
private char lastChar = 0;
public HtmlWriter(Appendable out) {
- if (out == null) {
- throw new NullPointerException("out must not be null");
- }
+ Objects.requireNonNull(out, "out must not be null");
this.buffer = out;
}
@@ -40,12 +38,14 @@ public void tag(String name, Map attrs, boolean voidElement) {
append("<");
append(name);
if (attrs != null && !attrs.isEmpty()) {
- for (Map.Entry attrib : attrs.entrySet()) {
+ for (var attr : attrs.entrySet()) {
append(" ");
- append(Escaping.escapeHtml(attrib.getKey()));
- append("=\"");
- append(Escaping.escapeHtml(attrib.getValue()));
- append("\"");
+ append(Escaping.escapeHtml(attr.getKey()));
+ if (attr.getValue() != null) {
+ append("=\"");
+ append(Escaping.escapeHtml(attr.getValue()));
+ append("\"");
+ }
}
}
if (voidElement) {
diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java
new file mode 100644
index 000000000..5a81676f4
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java
@@ -0,0 +1,554 @@
+package org.commonmark.renderer.markdown;
+
+import org.commonmark.node.*;
+import org.commonmark.renderer.NodeRenderer;
+import org.commonmark.text.AsciiMatcher;
+import org.commonmark.text.CharMatcher;
+import org.commonmark.text.Characters;
+
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * The node renderer that renders all the core nodes (comes last in the order of node renderers).
+ *
+ * Note that while sometimes it would be easier to record what kind of syntax was used on parsing (e.g. ATX vs Setext
+ * heading), this renderer is intended to also work for documents that were created by directly creating
+ * {@link Node Nodes} instead. So in order to support that, it sometimes needs to do a bit more work.
+ */
+public class CoreMarkdownNodeRenderer extends AbstractVisitor implements NodeRenderer {
+
+ private final AsciiMatcher textEscape;
+ private final CharMatcher textEscapeInHeading;
+ private final CharMatcher linkDestinationNeedsAngleBrackets =
+ AsciiMatcher.builder().c(' ').c('(').c(')').c('<').c('>').c('\n').c('\\').build();
+ private final CharMatcher linkDestinationEscapeInAngleBrackets =
+ AsciiMatcher.builder().c('<').c('>').c('\n').c('\\').build();
+ private final CharMatcher linkTitleEscapeInQuotes =
+ AsciiMatcher.builder().c('"').c('\n').c('\\').build();
+
+ private final Pattern orderedListMarkerPattern = Pattern.compile("^([0-9]{1,9})([.)])");
+
+ protected final MarkdownNodeRendererContext context;
+ private final MarkdownWriter writer;
+ /**
+ * If we're currently within a {@link BulletList} or {@link OrderedList}, this keeps the context of that list.
+ * It has a parent field so that it can represent a stack (for nested lists).
+ */
+ private ListHolder listHolder;
+
+ public CoreMarkdownNodeRenderer(MarkdownNodeRendererContext context) {
+ this.context = context;
+ this.writer = context.getWriter();
+
+ textEscape = AsciiMatcher.builder().anyOf("[]<>`*_&\n\\").anyOf(context.getSpecialCharacters()).build();
+ textEscapeInHeading = AsciiMatcher.builder(textEscape).anyOf("#").build();
+ }
+
+ @Override
+ public Set> getNodeTypes() {
+ return Set.of(
+ BlockQuote.class,
+ BulletList.class,
+ Code.class,
+ Document.class,
+ Emphasis.class,
+ FencedCodeBlock.class,
+ HardLineBreak.class,
+ Heading.class,
+ HtmlBlock.class,
+ HtmlInline.class,
+ Image.class,
+ IndentedCodeBlock.class,
+ Link.class,
+ ListItem.class,
+ OrderedList.class,
+ Paragraph.class,
+ SoftLineBreak.class,
+ StrongEmphasis.class,
+ Text.class,
+ ThematicBreak.class
+ );
+ }
+
+ @Override
+ public void render(Node node) {
+ node.accept(this);
+ }
+
+ @Override
+ public void visit(Document document) {
+ // No rendering itself
+ visitChildren(document);
+ writer.line();
+ }
+
+ @Override
+ public void visit(ThematicBreak thematicBreak) {
+ String literal = thematicBreak.getLiteral();
+ if (literal == null) {
+ // Let's use ___ as it doesn't introduce ambiguity with * or - list item markers
+ literal = "___";
+ }
+ writer.raw(literal);
+ writer.block();
+ }
+
+ @Override
+ public void visit(Heading heading) {
+ if (heading.getLevel() <= 2) {
+ LineBreakVisitor lineBreakVisitor = new LineBreakVisitor();
+ heading.accept(lineBreakVisitor);
+ boolean isMultipleLines = lineBreakVisitor.hasLineBreak();
+
+ if (isMultipleLines) {
+ // Setext headings: Can have multiple lines, but only level 1 or 2
+ visitChildren(heading);
+ writer.line();
+ if (heading.getLevel() == 1) {
+ // Note that it would be nice to match the length of the contents instead of just using 3, but that's
+ // not easy.
+ writer.raw("===");
+ } else {
+ writer.raw("---");
+ }
+ writer.block();
+ return;
+ }
+ }
+
+ // ATX headings: Can't have multiple lines, but up to level 6.
+ for (int i = 0; i < heading.getLevel(); i++) {
+ writer.raw('#');
+ }
+ writer.raw(' ');
+ visitChildren(heading);
+
+ writer.block();
+ }
+
+ @Override
+ public void visit(IndentedCodeBlock indentedCodeBlock) {
+ String literal = indentedCodeBlock.getLiteral();
+ // We need to respect line prefixes which is why we need to write it line by line (e.g. an indented code block
+ // within a block quote)
+ writer.writePrefix(" ");
+ writer.pushPrefix(" ");
+ List lines = getLines(literal);
+ for (int i = 0; i < lines.size(); i++) {
+ String line = lines.get(i);
+ writer.raw(line);
+ if (i != lines.size() - 1) {
+ writer.line();
+ }
+ }
+ writer.popPrefix();
+ writer.block();
+ }
+
+ @Override
+ public void visit(FencedCodeBlock codeBlock) {
+ String literal = codeBlock.getLiteral();
+ String fenceChar = codeBlock.getFenceCharacter() != null ? codeBlock.getFenceCharacter() : "`";
+ int openingFenceLength;
+ if (codeBlock.getOpeningFenceLength() != null) {
+ // If we have a known fence length, use it
+ openingFenceLength = codeBlock.getOpeningFenceLength();
+ } else {
+ // Otherwise, calculate the closing fence length pessimistically, e.g. if the code block itself contains a
+ // line with ```, we need to use a fence of length 4. If ``` occurs with non-whitespace characters on a
+ // line, we technically don't need a longer fence, but it's not incorrect to do so.
+ int fenceCharsInLiteral = findMaxRunLength(fenceChar, literal);
+ openingFenceLength = Math.max(fenceCharsInLiteral + 1, 3);
+ }
+ int closingFenceLength = codeBlock.getClosingFenceLength() != null ? codeBlock.getClosingFenceLength() : openingFenceLength;
+
+ String openingFence = repeat(fenceChar, openingFenceLength);
+ String closingFence = repeat(fenceChar, closingFenceLength);
+ int indent = codeBlock.getFenceIndent();
+
+ if (indent > 0) {
+ String indentPrefix = repeat(" ", indent);
+ writer.writePrefix(indentPrefix);
+ writer.pushPrefix(indentPrefix);
+ }
+
+ writer.raw(openingFence);
+ if (codeBlock.getInfo() != null) {
+ writer.raw(codeBlock.getInfo());
+ }
+ writer.line();
+ if (!literal.isEmpty()) {
+ List lines = getLines(literal);
+ for (String line : lines) {
+ writer.raw(line);
+ writer.line();
+ }
+ }
+ writer.raw(closingFence);
+ if (indent > 0) {
+ writer.popPrefix();
+ }
+ writer.block();
+ }
+
+ @Override
+ public void visit(HtmlBlock htmlBlock) {
+ List lines = getLines(htmlBlock.getLiteral());
+ for (int i = 0; i < lines.size(); i++) {
+ String line = lines.get(i);
+ writer.raw(line);
+ if (i != lines.size() - 1) {
+ writer.line();
+ }
+ }
+ writer.block();
+ }
+
+ @Override
+ public void visit(Paragraph paragraph) {
+ visitChildren(paragraph);
+ writer.block();
+ }
+
+ @Override
+ public void visit(BlockQuote blockQuote) {
+ writer.writePrefix("> ");
+ writer.pushPrefix("> ");
+ visitChildren(blockQuote);
+ writer.popPrefix();
+ writer.block();
+ }
+
+ @Override
+ public void visit(BulletList bulletList) {
+ writer.pushTight(bulletList.isTight());
+ listHolder = new BulletListHolder(listHolder, bulletList);
+ visitChildren(bulletList);
+ listHolder = listHolder.parent;
+ writer.popTight();
+ writer.block();
+ }
+
+ @Override
+ public void visit(OrderedList orderedList) {
+ writer.pushTight(orderedList.isTight());
+ listHolder = new OrderedListHolder(listHolder, orderedList);
+ visitChildren(orderedList);
+ listHolder = listHolder.parent;
+ writer.popTight();
+ writer.block();
+ }
+
+ @Override
+ public void visit(ListItem listItem) {
+ int markerIndent = listItem.getMarkerIndent() != null ? listItem.getMarkerIndent() : 0;
+ String marker;
+ if (listHolder instanceof BulletListHolder) {
+ BulletListHolder bulletListHolder = (BulletListHolder) listHolder;
+ marker = repeat(" ", markerIndent) + bulletListHolder.marker;
+ } else if (listHolder instanceof OrderedListHolder) {
+ OrderedListHolder orderedListHolder = (OrderedListHolder) listHolder;
+ marker = repeat(" ", markerIndent) + orderedListHolder.number + orderedListHolder.delimiter;
+ orderedListHolder.number++;
+ } else {
+ throw new IllegalStateException("Unknown list holder type: " + listHolder);
+ }
+ Integer contentIndent = listItem.getContentIndent();
+ String spaces = contentIndent != null ? repeat(" ", Math.max(contentIndent - marker.length(), 1)) : " ";
+ writer.writePrefix(marker);
+ writer.writePrefix(spaces);
+ writer.pushPrefix(repeat(" ", marker.length() + spaces.length()));
+
+ if (listItem.getFirstChild() == null) {
+ // Empty list item
+ writer.block();
+ } else {
+ visitChildren(listItem);
+ }
+
+ writer.popPrefix();
+ }
+
+ @Override
+ public void visit(Code code) {
+ String literal = code.getLiteral();
+ // If the literal includes backticks, we can surround them by using one more backtick.
+ int backticks = findMaxRunLength("`", literal);
+ for (int i = 0; i < backticks + 1; i++) {
+ writer.raw('`');
+ }
+ // If the literal starts or ends with a backtick, surround it with a single space.
+ // If it starts and ends with a space (but is not only spaces), add an additional space (otherwise they would
+ // get removed on parsing).
+ boolean addSpace = literal.startsWith("`") || literal.endsWith("`") ||
+ (literal.startsWith(" ") && literal.endsWith(" ") && Characters.hasNonSpace(literal));
+ if (addSpace) {
+ writer.raw(' ');
+ }
+ writer.raw(literal);
+ if (addSpace) {
+ writer.raw(' ');
+ }
+ for (int i = 0; i < backticks + 1; i++) {
+ writer.raw('`');
+ }
+ }
+
+ @Override
+ public void visit(Emphasis emphasis) {
+ String delimiter = emphasis.getOpeningDelimiter();
+ // Use delimiter that was parsed if available
+ if (delimiter == null) {
+ // When emphasis is nested, a different delimiter needs to be used
+ delimiter = writer.getLastChar() == '*' ? "_" : "*";
+ }
+ writer.raw(delimiter);
+ super.visit(emphasis);
+ writer.raw(delimiter);
+ }
+
+ @Override
+ public void visit(StrongEmphasis strongEmphasis) {
+ writer.raw("**");
+ super.visit(strongEmphasis);
+ writer.raw("**");
+ }
+
+ @Override
+ public void visit(Link link) {
+ writeLinkLike(link.getTitle(), link.getDestination(), link, "[");
+ }
+
+ @Override
+ public void visit(Image image) {
+ writeLinkLike(image.getTitle(), image.getDestination(), image, "![");
+ }
+
+ @Override
+ public void visit(HtmlInline htmlInline) {
+ writer.raw(htmlInline.getLiteral());
+ }
+
+ @Override
+ public void visit(HardLineBreak hardLineBreak) {
+ writer.raw(" ");
+ writer.line();
+ }
+
+ @Override
+ public void visit(SoftLineBreak softLineBreak) {
+ writer.line();
+ }
+
+ @Override
+ public void visit(Text text) {
+ // Text is tricky. In Markdown special characters (`-`, `#` etc.) can be escaped (`\-`, `\#` etc.) so that
+ // they're parsed as plain text. Currently, whether a character was escaped or not is not recorded in the Node,
+ // so here we don't know. If we just wrote out those characters unescaped, the resulting Markdown would change
+ // meaning (turn into a list item, heading, etc.).
+ // You might say "Why not store that in the Node when parsing", but that wouldn't work for the use case where
+ // nodes are constructed directly instead of via parsing. This renderer needs to work for that too.
+ // So currently, when in doubt, we escape. For special characters only occurring at the beginning of a line,
+ // we only escape them then (we wouldn't want to escape every `.` for example).
+ String literal = text.getLiteral();
+ if (writer.isAtLineStart() && !literal.isEmpty()) {
+ char c = literal.charAt(0);
+ switch (c) {
+ case '-': {
+ // Would be ambiguous with a bullet list marker, escape
+ writer.raw("\\-");
+ literal = literal.substring(1);
+ break;
+ }
+ case '#': {
+ // Would be ambiguous with an ATX heading, escape
+ writer.raw("\\#");
+ literal = literal.substring(1);
+ break;
+ }
+ case '=': {
+ // Would be ambiguous with a Setext heading, escape unless it's the first line in the block
+ if (text.getPrevious() != null) {
+ writer.raw("\\=");
+ literal = literal.substring(1);
+ }
+ break;
+ }
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': {
+ // Check for ordered list marker
+ Matcher m = orderedListMarkerPattern.matcher(literal);
+ if (m.find()) {
+ writer.raw(m.group(1));
+ writer.raw("\\" + m.group(2));
+ literal = literal.substring(m.end());
+ }
+ break;
+ }
+ case '\t': {
+ writer.raw(" ");
+ literal = literal.substring(1);
+ break;
+ }
+ case ' ': {
+ writer.raw(" ");
+ literal = literal.substring(1);
+ break;
+ }
+ }
+ }
+
+ CharMatcher escape = text.getParent() instanceof Heading ? textEscapeInHeading : textEscape;
+
+ if (literal.endsWith("!") && text.getNext() instanceof Link) {
+ // If we wrote the `!` unescaped, it would turn the link into an image instead.
+ writer.text(literal.substring(0, literal.length() - 1), escape);
+ writer.raw("\\!");
+ } else {
+ writer.text(literal, escape);
+ }
+ }
+
+ @Override
+ protected void visitChildren(Node parent) {
+ Node node = parent.getFirstChild();
+ while (node != null) {
+ Node next = node.getNext();
+ context.render(node);
+ node = next;
+ }
+ }
+
+ private static int findMaxRunLength(String needle, String s) {
+ int maxRunLength = 0;
+ int pos = 0;
+ while (pos < s.length()) {
+ pos = s.indexOf(needle, pos);
+ if (pos == -1) {
+ return maxRunLength;
+ }
+ int runLength = 0;
+ do {
+ pos += needle.length();
+ runLength++;
+ } while (s.startsWith(needle, pos));
+ maxRunLength = Math.max(runLength, maxRunLength);
+ }
+ return maxRunLength;
+ }
+
+ private static boolean contains(String s, CharMatcher charMatcher) {
+ for (int i = 0; i < s.length(); i++) {
+ if (charMatcher.matches(s.charAt(i))) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // Keep for Android compat (String.repeat only available on Android 12 and later)
+ private static String repeat(String s, int count) {
+ StringBuilder sb = new StringBuilder(s.length() * count);
+ for (int i = 0; i < count; i++) {
+ sb.append(s);
+ }
+ return sb.toString();
+ }
+
+ private static List getLines(String literal) {
+ // Without -1, split would discard all trailing empty strings, which is not what we want, e.g. it would
+ // return the same result for "abc", "abc\n" and "abc\n\n".
+ // With -1, it returns ["abc"], ["abc", ""] and ["abc", "", ""].
+ String[] parts = literal.split("\n", -1);
+ if (parts[parts.length - 1].isEmpty()) {
+ // But we don't want the last empty string, as "\n" is used as a line terminator (not a separator),
+ // so return without the last element.
+ return List.of(parts).subList(0, parts.length - 1);
+ } else {
+ return List.of(parts);
+ }
+ }
+
+ private void writeLinkLike(String title, String destination, Node node, String opener) {
+ writer.raw(opener);
+ visitChildren(node);
+ writer.raw(']');
+ writer.raw('(');
+ if (contains(destination, linkDestinationNeedsAngleBrackets)) {
+ writer.raw('<');
+ writer.text(destination, linkDestinationEscapeInAngleBrackets);
+ writer.raw('>');
+ } else {
+ writer.raw(destination);
+ }
+ if (title != null) {
+ writer.raw(' ');
+ writer.raw('"');
+ writer.text(title, linkTitleEscapeInQuotes);
+ writer.raw('"');
+ }
+ writer.raw(')');
+ }
+
+ private static class ListHolder {
+ final ListHolder parent;
+
+ protected ListHolder(ListHolder parent) {
+ this.parent = parent;
+ }
+ }
+
+ private static class BulletListHolder extends ListHolder {
+ final String marker;
+
+ public BulletListHolder(ListHolder parent, BulletList bulletList) {
+ super(parent);
+ this.marker = bulletList.getMarker() != null ? bulletList.getMarker() : "-";
+ }
+ }
+
+ private static class OrderedListHolder extends ListHolder {
+ final String delimiter;
+ private int number;
+
+ protected OrderedListHolder(ListHolder parent, OrderedList orderedList) {
+ super(parent);
+ delimiter = orderedList.getMarkerDelimiter() != null ? orderedList.getMarkerDelimiter() : ".";
+ number = orderedList.getMarkerStartNumber() != null ? orderedList.getMarkerStartNumber() : 1;
+ }
+ }
+
+ /**
+ * Visits nodes to check if there are any soft or hard line breaks.
+ */
+ private static class LineBreakVisitor extends AbstractVisitor {
+ private boolean lineBreak = false;
+
+ public boolean hasLineBreak() {
+ return lineBreak;
+ }
+
+ @Override
+ public void visit(SoftLineBreak softLineBreak) {
+ super.visit(softLineBreak);
+ lineBreak = true;
+ }
+
+ @Override
+ public void visit(HardLineBreak hardLineBreak) {
+ super.visit(hardLineBreak);
+ lineBreak = true;
+ }
+ }
+}
diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererContext.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererContext.java
new file mode 100644
index 000000000..40640d1b4
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererContext.java
@@ -0,0 +1,30 @@
+package org.commonmark.renderer.markdown;
+
+import org.commonmark.node.Node;
+
+import java.util.Set;
+
+/**
+ * Context that is passed to custom node renderers, see {@link MarkdownNodeRendererFactory#create}.
+ */
+public interface MarkdownNodeRendererContext {
+
+ /**
+ * @return the writer to use
+ */
+ MarkdownWriter getWriter();
+
+ /**
+ * Render the specified node and its children using the configured renderers. This should be used to render child
+ * nodes; be careful not to pass the node that is being rendered, that would result in an endless loop.
+ *
+ * @param node the node to render
+ */
+ void render(Node node);
+
+ /**
+ * @return additional special characters that need to be escaped if they occur in normal text; currently only ASCII
+ * characters are allowed
+ */
+ Set getSpecialCharacters();
+}
diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererFactory.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererFactory.java
new file mode 100644
index 000000000..14221ea56
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererFactory.java
@@ -0,0 +1,25 @@
+package org.commonmark.renderer.markdown;
+
+import org.commonmark.renderer.NodeRenderer;
+
+import java.util.Set;
+
+/**
+ * Factory for instantiating new node renderers for rendering custom nodes.
+ */
+public interface MarkdownNodeRendererFactory {
+
+ /**
+ * Create a new node renderer for the specified rendering context.
+ *
+ * @param context the context for rendering (normally passed on to the node renderer)
+ * @return a node renderer
+ */
+ NodeRenderer create(MarkdownNodeRendererContext context);
+
+ /**
+ * @return the additional special characters that this factory would like to have escaped in normal text; currently
+ * only ASCII characters are allowed
+ */
+ Set getSpecialCharacters();
+}
diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownRenderer.java
new file mode 100644
index 000000000..e4996fb08
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownRenderer.java
@@ -0,0 +1,161 @@
+package org.commonmark.renderer.markdown;
+
+import org.commonmark.Extension;
+import org.commonmark.internal.renderer.NodeRendererMap;
+import org.commonmark.node.Node;
+import org.commonmark.renderer.NodeRenderer;
+import org.commonmark.renderer.Renderer;
+
+import java.util.*;
+
+/**
+ * Renders nodes to Markdown (CommonMark syntax); use {@link #builder()} to create a renderer.
+ *
+ * Note that it doesn't currently preserve the exact syntax of the original input Markdown (if any):
+ *
+ *
Headings are output as ATX headings if possible (multi-line headings need Setext headings)
+ *
Links are always rendered as inline links (no support for reference links yet)
+ *
Escaping might be over-eager, e.g. a plain {@code *} might be escaped
+ * even though it doesn't need to be in that particular context
+ *
Leading whitespace in paragraphs is not preserved
+ *
+ * However, it should produce Markdown that is semantically equivalent to the input, i.e. if the Markdown was parsed
+ * again and compared against the original AST, it should be the same (minus bugs).
+ */
+public class MarkdownRenderer implements Renderer {
+
+ private final List nodeRendererFactories;
+
+ private MarkdownRenderer(Builder builder) {
+ this.nodeRendererFactories = new ArrayList<>(builder.nodeRendererFactories.size() + 1);
+ this.nodeRendererFactories.addAll(builder.nodeRendererFactories);
+ // Add as last. This means clients can override the rendering of core nodes if they want.
+ this.nodeRendererFactories.add(new MarkdownNodeRendererFactory() {
+ @Override
+ public NodeRenderer create(MarkdownNodeRendererContext context) {
+ return new CoreMarkdownNodeRenderer(context);
+ }
+
+ @Override
+ public Set getSpecialCharacters() {
+ return Set.of();
+ }
+ });
+ }
+
+ /**
+ * Create a new builder for configuring a {@link MarkdownRenderer}.
+ *
+ * @return a builder
+ */
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ @Override
+ public void render(Node node, Appendable output) {
+ RendererContext context = new RendererContext(new MarkdownWriter(output));
+ context.render(node);
+ }
+
+ @Override
+ public String render(Node node) {
+ StringBuilder sb = new StringBuilder();
+ render(node, sb);
+ return sb.toString();
+ }
+
+ /**
+ * Builder for configuring a {@link MarkdownRenderer}. See methods for default configuration.
+ */
+ public static class Builder {
+
+ private final List nodeRendererFactories = new ArrayList<>();
+
+ /**
+ * @return the configured {@link MarkdownRenderer}
+ */
+ public MarkdownRenderer build() {
+ return new MarkdownRenderer(this);
+ }
+
+ /**
+ * Add a factory for instantiating a node renderer (done when rendering). This allows to override the rendering
+ * of node types or define rendering for custom node types.
+ *
+ * If multiple node renderers for the same node type are created, the one from the factory that was added first
+ * "wins". (This is how the rendering for core node types can be overridden; the default rendering comes last.)
+ *
+ * @param nodeRendererFactory the factory for creating a node renderer
+ * @return {@code this}
+ */
+ public Builder nodeRendererFactory(MarkdownNodeRendererFactory nodeRendererFactory) {
+ this.nodeRendererFactories.add(nodeRendererFactory);
+ return this;
+ }
+
+ /**
+ * @param extensions extensions to use on this renderer
+ * @return {@code this}
+ */
+ public Builder extensions(Iterable extends Extension> extensions) {
+ for (Extension extension : extensions) {
+ if (extension instanceof MarkdownRendererExtension) {
+ MarkdownRendererExtension markdownRendererExtension = (MarkdownRendererExtension) extension;
+ markdownRendererExtension.extend(this);
+ }
+ }
+ return this;
+ }
+ }
+
+ /**
+ * Extension for {@link MarkdownRenderer} for rendering custom nodes.
+ */
+ public interface MarkdownRendererExtension extends Extension {
+
+ /**
+ * Extend Markdown rendering, usually by registering custom node renderers using {@link Builder#nodeRendererFactory}.
+ *
+ * @param rendererBuilder the renderer builder to extend
+ */
+ void extend(Builder rendererBuilder);
+ }
+
+ private class RendererContext implements MarkdownNodeRendererContext {
+ private final MarkdownWriter writer;
+ private final NodeRendererMap nodeRendererMap = new NodeRendererMap();
+ private final Set additionalTextEscapes;
+
+ private RendererContext(MarkdownWriter writer) {
+ // Set fields that are used by interface
+ this.writer = writer;
+ Set escapes = new HashSet<>();
+ for (MarkdownNodeRendererFactory factory : nodeRendererFactories) {
+ escapes.addAll(factory.getSpecialCharacters());
+ }
+ additionalTextEscapes = Collections.unmodifiableSet(escapes);
+
+ for (var factory : nodeRendererFactories) {
+ // Pass in this as context here, which uses the fields set above
+ var renderer = factory.create(this);
+ nodeRendererMap.add(renderer);
+ }
+ }
+
+ @Override
+ public MarkdownWriter getWriter() {
+ return writer;
+ }
+
+ @Override
+ public void render(Node node) {
+ nodeRendererMap.render(node);
+ }
+
+ @Override
+ public Set getSpecialCharacters() {
+ return additionalTextEscapes;
+ }
+ }
+}
diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java
new file mode 100644
index 000000000..c9f427021
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java
@@ -0,0 +1,246 @@
+package org.commonmark.renderer.markdown;
+
+import org.commonmark.text.CharMatcher;
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+/**
+ * Writer for Markdown (CommonMark) text.
+ */
+public class MarkdownWriter {
+
+ private final Appendable buffer;
+
+ private int blockSeparator = 0;
+ private char lastChar;
+ private boolean atLineStart = true;
+
+ // Stacks of settings that affect various rendering behaviors. The common pattern here is that callers use "push" to
+ // change a setting, render some nodes, and then "pop" the setting off the stack again to restore previous state.
+ private final LinkedList prefixes = new LinkedList<>();
+ private final LinkedList tight = new LinkedList<>();
+ private final LinkedList rawEscapes = new LinkedList<>();
+
+ public MarkdownWriter(Appendable out) {
+ buffer = out;
+ }
+
+ /**
+ * Write the supplied string (raw/unescaped except if {@link #pushRawEscape} was used).
+ */
+ public void raw(String s) {
+ flushBlockSeparator();
+ write(s, null);
+ }
+
+ /**
+ * Write the supplied character (raw/unescaped except if {@link #pushRawEscape} was used).
+ */
+ public void raw(char c) {
+ flushBlockSeparator();
+ write(c);
+ }
+
+ /**
+ * Write the supplied string with escaping.
+ *
+ * @param s the string to write
+ * @param escape which characters to escape
+ */
+ public void text(String s, CharMatcher escape) {
+ if (s.isEmpty()) {
+ return;
+ }
+ flushBlockSeparator();
+ write(s, escape);
+
+ lastChar = s.charAt(s.length() - 1);
+ atLineStart = false;
+ }
+
+ /**
+ * Write a newline (line terminator).
+ */
+ public void line() {
+ write('\n');
+ writePrefixes();
+ atLineStart = true;
+ }
+
+ /**
+ * Enqueue a block separator to be written before the next text is written. Block separators are not written
+ * straight away because if there are no more blocks to write we don't want a separator (at the end of the document).
+ */
+ public void block() {
+ // Remember whether this should be a tight or loose separator now because tight could get changed in between
+ // this and the next flush.
+ blockSeparator = isTight() ? 1 : 2;
+ atLineStart = true;
+ }
+
+ /**
+ * Push a prefix onto the top of the stack. All prefixes are written at the beginning of each line, until the
+ * prefix is popped again.
+ *
+ * @param prefix the raw prefix string
+ */
+ public void pushPrefix(String prefix) {
+ prefixes.addLast(prefix);
+ }
+
+ /**
+ * Write a prefix.
+ *
+ * @param prefix the raw prefix string to write
+ */
+ public void writePrefix(String prefix) {
+ boolean tmp = atLineStart;
+ raw(prefix);
+ atLineStart = tmp;
+ }
+
+ /**
+ * Remove the last prefix from the top of the stack.
+ */
+ public void popPrefix() {
+ prefixes.removeLast();
+ }
+
+ /**
+ * Change whether blocks are tight or loose. Loose is the default where blocks are separated by a blank line. Tight
+ * is where blocks are not separated by a blank line. Tight blocks are used in lists, if there are no blank lines
+ * within the list.
+ *
+ * Note that changing this does not affect block separators that have already been enqueued with {@link #block()},
+ * only future ones.
+ */
+ public void pushTight(boolean tight) {
+ this.tight.addLast(tight);
+ }
+
+ /**
+ * Remove the last "tight" setting from the top of the stack.
+ */
+ public void popTight() {
+ this.tight.removeLast();
+ }
+
+ /**
+ * Escape the characters matching the supplied matcher, in all text (text and raw). This might be useful to
+ * extensions that add another layer of syntax, e.g. the tables extension that uses `|` to separate cells and needs
+ * all `|` characters to be escaped (even in code spans).
+ *
+ * @param rawEscape the characters to escape in raw text
+ */
+ public void pushRawEscape(CharMatcher rawEscape) {
+ rawEscapes.add(rawEscape);
+ }
+
+ /**
+ * Remove the last raw escape from the top of the stack.
+ */
+ public void popRawEscape() {
+ rawEscapes.removeLast();
+ }
+
+ /**
+ * @return the last character that was written
+ */
+ public char getLastChar() {
+ return lastChar;
+ }
+
+ /**
+ * @return whether we're at the line start (not counting any prefixes), i.e. after a {@link #line} or {@link #block}.
+ */
+ public boolean isAtLineStart() {
+ return atLineStart;
+ }
+
+ private void write(String s, CharMatcher escape) {
+ try {
+ if (rawEscapes.isEmpty() && escape == null) {
+ // Normal fast path
+ buffer.append(s);
+ } else {
+ for (int i = 0; i < s.length(); i++) {
+ append(s.charAt(i), escape);
+ }
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ int length = s.length();
+ if (length != 0) {
+ lastChar = s.charAt(length - 1);
+ }
+ atLineStart = false;
+ }
+
+ private void write(char c) {
+ try {
+ append(c, null);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ lastChar = c;
+ atLineStart = false;
+ }
+
+ private void writePrefixes() {
+ if (!prefixes.isEmpty()) {
+ for (String prefix : prefixes) {
+ write(prefix, null);
+ }
+ }
+ }
+
+ /**
+ * If a block separator has been enqueued with {@link #block()} but not yet written, write it now.
+ */
+ private void flushBlockSeparator() {
+ if (blockSeparator != 0) {
+ write('\n');
+ writePrefixes();
+ if (blockSeparator > 1) {
+ write('\n');
+ writePrefixes();
+ }
+ blockSeparator = 0;
+ }
+ }
+
+ private void append(char c, CharMatcher escape) throws IOException {
+ if (needsEscaping(c, escape)) {
+ if (c == '\n') {
+ // Can't escape this with \, use numeric character reference
+ buffer.append("
");
+ } else {
+ buffer.append('\\');
+ buffer.append(c);
+ }
+ } else {
+ buffer.append(c);
+ }
+ }
+
+ private boolean isTight() {
+ return !tight.isEmpty() && tight.getLast();
+ }
+
+ private boolean needsEscaping(char c, CharMatcher escape) {
+ return (escape != null && escape.matches(c)) || rawNeedsEscaping(c);
+ }
+
+ private boolean rawNeedsEscaping(char c) {
+ for (CharMatcher rawEscape : rawEscapes) {
+ if (rawEscape.matches(c)) {
+ return true;
+ }
+ }
+ return false;
+ }
+}
diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/package-info.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/package-info.java
new file mode 100644
index 000000000..f707671d5
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Markdown rendering (see {@link org.commonmark.renderer.markdown.MarkdownRenderer})
+ */
+package org.commonmark.renderer.markdown;
diff --git a/commonmark/src/main/java/org/commonmark/renderer/text/CoreTextContentNodeRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/text/CoreTextContentNodeRenderer.java
index a5f9db518..ee564cbdb 100644
--- a/commonmark/src/main/java/org/commonmark/renderer/text/CoreTextContentNodeRenderer.java
+++ b/commonmark/src/main/java/org/commonmark/renderer/text/CoreTextContentNodeRenderer.java
@@ -2,12 +2,7 @@
import org.commonmark.node.*;
import org.commonmark.renderer.NodeRenderer;
-import org.commonmark.internal.renderer.text.BulletListHolder;
-import org.commonmark.internal.renderer.text.ListHolder;
-import org.commonmark.internal.renderer.text.OrderedListHolder;
-import java.util.Arrays;
-import java.util.HashSet;
import java.util.Set;
/**
@@ -27,7 +22,7 @@ public CoreTextContentNodeRenderer(TextContentNodeRendererContext context) {
@Override
public Set> getNodeTypes() {
- return new HashSet<>(Arrays.asList(
+ return Set.of(
Document.class,
Heading.class,
Paragraph.class,
@@ -48,7 +43,7 @@ public Set> getNodeTypes() {
HtmlInline.class,
SoftLineBreak.class,
HardLineBreak.class
- ));
+ );
}
@Override
@@ -64,26 +59,24 @@ public void visit(Document document) {
@Override
public void visit(BlockQuote blockQuote) {
- textContent.write('«');
+ // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ textContent.write('\u00AB');
visitChildren(blockQuote);
- textContent.write('»');
+ textContent.resetBlock();
+ // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ textContent.write('\u00BB');
- writeEndOfLineIfNeeded(blockQuote, null);
+ textContent.block();
}
@Override
public void visit(BulletList bulletList) {
- if (listHolder != null) {
- writeEndOfLine();
- }
+ textContent.pushTight(bulletList.isTight());
listHolder = new BulletListHolder(listHolder, bulletList);
visitChildren(bulletList);
- writeEndOfLineIfNeeded(bulletList, null);
- if (listHolder.getParent() != null) {
- listHolder = listHolder.getParent();
- } else {
- listHolder = null;
- }
+ textContent.popTight();
+ textContent.block();
+ listHolder = listHolder.getParent();
}
@Override
@@ -95,31 +88,40 @@ public void visit(Code code) {
@Override
public void visit(FencedCodeBlock fencedCodeBlock) {
- if (context.stripNewlines()) {
- textContent.writeStripped(fencedCodeBlock.getLiteral());
- writeEndOfLineIfNeeded(fencedCodeBlock, null);
+ var literal = stripTrailingNewline(fencedCodeBlock.getLiteral());
+ if (stripNewlines()) {
+ textContent.writeStripped(literal);
} else {
- textContent.write(fencedCodeBlock.getLiteral());
+ textContent.write(literal);
}
+ textContent.block();
}
@Override
public void visit(HardLineBreak hardLineBreak) {
- writeEndOfLineIfNeeded(hardLineBreak, null);
+ if (stripNewlines()) {
+ textContent.whitespace();
+ } else {
+ textContent.line();
+ }
}
@Override
public void visit(Heading heading) {
visitChildren(heading);
- writeEndOfLineIfNeeded(heading, ':');
+ if (stripNewlines()) {
+ textContent.write(": ");
+ } else {
+ textContent.block();
+ }
}
@Override
public void visit(ThematicBreak thematicBreak) {
- if (!context.stripNewlines()) {
+ if (!stripNewlines()) {
textContent.write("***");
}
- writeEndOfLineIfNeeded(thematicBreak, null);
+ textContent.block();
}
@Override
@@ -139,12 +141,13 @@ public void visit(Image image) {
@Override
public void visit(IndentedCodeBlock indentedCodeBlock) {
- if (context.stripNewlines()) {
- textContent.writeStripped(indentedCodeBlock.getLiteral());
- writeEndOfLineIfNeeded(indentedCodeBlock, null);
+ var literal = stripTrailingNewline(indentedCodeBlock.getLiteral());
+ if (stripNewlines()) {
+ textContent.writeStripped(literal);
} else {
- textContent.write(indentedCodeBlock.getLiteral());
+ textContent.write(literal);
}
+ textContent.block();
}
@Override
@@ -155,49 +158,56 @@ public void visit(Link link) {
@Override
public void visit(ListItem listItem) {
if (listHolder != null && listHolder instanceof OrderedListHolder) {
- OrderedListHolder orderedListHolder = (OrderedListHolder) listHolder;
- String indent = context.stripNewlines() ? "" : orderedListHolder.getIndent();
- textContent.write(indent + orderedListHolder.getCounter() + orderedListHolder.getDelimiter() + " ");
+ var orderedListHolder = (OrderedListHolder) listHolder;
+ var marker = orderedListHolder.getCounter() + orderedListHolder.getDelimiter();
+ var spaces = " ";
+ textContent.write(marker);
+ textContent.write(spaces);
+ textContent.pushPrefix(repeat(" ", marker.length() + spaces.length()));
visitChildren(listItem);
- writeEndOfLineIfNeeded(listItem, null);
+ textContent.block();
+ textContent.popPrefix();
orderedListHolder.increaseCounter();
} else if (listHolder != null && listHolder instanceof BulletListHolder) {
BulletListHolder bulletListHolder = (BulletListHolder) listHolder;
- if (!context.stripNewlines()) {
- textContent.write(bulletListHolder.getIndent() + bulletListHolder.getMarker() + " ");
+ if (!stripNewlines()) {
+ var marker = bulletListHolder.getMarker();
+ var spaces = " ";
+ textContent.write(marker);
+ textContent.write(spaces);
+ textContent.pushPrefix(repeat(" ", marker.length() + spaces.length()));
}
visitChildren(listItem);
- writeEndOfLineIfNeeded(listItem, null);
+ textContent.block();
+ if (!stripNewlines()) {
+ textContent.popPrefix();
+ }
}
}
@Override
public void visit(OrderedList orderedList) {
- if (listHolder != null) {
- writeEndOfLine();
- }
+ textContent.pushTight(orderedList.isTight());
listHolder = new OrderedListHolder(listHolder, orderedList);
visitChildren(orderedList);
- writeEndOfLineIfNeeded(orderedList, null);
- if (listHolder.getParent() != null) {
- listHolder = listHolder.getParent();
- } else {
- listHolder = null;
- }
+ textContent.popTight();
+ textContent.block();
+ listHolder = listHolder.getParent();
}
@Override
public void visit(Paragraph paragraph) {
visitChildren(paragraph);
- // Add "end of line" only if its "root paragraph.
- if (paragraph.getParent() == null || paragraph.getParent() instanceof Document) {
- writeEndOfLineIfNeeded(paragraph, null);
- }
+ textContent.block();
}
@Override
public void visit(SoftLineBreak softLineBreak) {
- writeEndOfLineIfNeeded(softLineBreak, null);
+ if (stripNewlines()) {
+ textContent.whitespace();
+ } else {
+ textContent.line();
+ }
}
@Override
@@ -216,7 +226,7 @@ protected void visitChildren(Node parent) {
}
private void writeText(String text) {
- if (context.stripNewlines()) {
+ if (stripNewlines()) {
textContent.writeStripped(text);
} else {
textContent.write(text);
@@ -255,26 +265,72 @@ private void writeLink(Node node, String title, String destination) {
}
}
- private void writeEndOfLineIfNeeded(Node node, Character c) {
- if (context.stripNewlines()) {
- if (c != null) {
- textContent.write(c);
- }
- if (node.getNext() != null) {
- textContent.whitespace();
- }
+ private boolean stripNewlines() {
+ return context.lineBreakRendering() == LineBreakRendering.STRIP;
+ }
+
+ private static String stripTrailingNewline(String s) {
+ if (s.endsWith("\n")) {
+ return s.substring(0, s.length() - 1);
} else {
- if (node.getNext() != null) {
- textContent.line();
- }
+ return s;
}
}
- private void writeEndOfLine() {
- if (context.stripNewlines()) {
- textContent.whitespace();
- } else {
- textContent.line();
+ // Keep for Android compat (String.repeat only available on Android 12 and later)
+ private static String repeat(String s, int count) {
+ var sb = new StringBuilder(s.length() * count);
+ for (int i = 0; i < count; i++) {
+ sb.append(s);
+ }
+ return sb.toString();
+ }
+
+ private static class BulletListHolder extends ListHolder {
+ private final String marker;
+
+ public BulletListHolder(ListHolder parent, BulletList list) {
+ super(parent);
+ marker = list.getMarker();
+ }
+
+ public String getMarker() {
+ return marker;
+ }
+ }
+
+ private abstract static class ListHolder {
+ private final ListHolder parent;
+
+ ListHolder(ListHolder parent) {
+ this.parent = parent;
+ }
+
+ public ListHolder getParent() {
+ return parent;
+ }
+ }
+
+ private static class OrderedListHolder extends ListHolder {
+ private final String delimiter;
+ private int counter;
+
+ public OrderedListHolder(ListHolder parent, OrderedList list) {
+ super(parent);
+ delimiter = list.getMarkerDelimiter() != null ? list.getMarkerDelimiter() : ".";
+ counter = list.getMarkerStartNumber() != null ? list.getMarkerStartNumber() : 1;
+ }
+
+ public String getDelimiter() {
+ return delimiter;
+ }
+
+ public int getCounter() {
+ return counter;
+ }
+
+ public void increaseCounter() {
+ counter++;
}
}
}
diff --git a/commonmark/src/main/java/org/commonmark/renderer/text/LineBreakRendering.java b/commonmark/src/main/java/org/commonmark/renderer/text/LineBreakRendering.java
new file mode 100644
index 000000000..27eeaf0da
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/renderer/text/LineBreakRendering.java
@@ -0,0 +1,19 @@
+package org.commonmark.renderer.text;
+
+/**
+ * Control how line breaks are rendered.
+ */
+public enum LineBreakRendering {
+ /**
+ * Strip all line breaks within blocks and between blocks, resulting in all the text in a single line.
+ */
+ STRIP,
+ /**
+ * Use single line breaks between blocks, not a blank line (also render all lists as tight).
+ */
+ COMPACT,
+ /**
+ * Separate blocks by a blank line (and respect tight vs loose lists).
+ */
+ SEPARATE_BLOCKS,
+}
diff --git a/commonmark/src/main/java/org/commonmark/renderer/text/TextContentNodeRendererContext.java b/commonmark/src/main/java/org/commonmark/renderer/text/TextContentNodeRendererContext.java
index 1b1cf327c..d6fcb8d77 100644
--- a/commonmark/src/main/java/org/commonmark/renderer/text/TextContentNodeRendererContext.java
+++ b/commonmark/src/main/java/org/commonmark/renderer/text/TextContentNodeRendererContext.java
@@ -4,10 +4,17 @@
public interface TextContentNodeRendererContext {
+ /**
+ * Controls how line breaks should be rendered, see {@link LineBreakRendering}.
+ */
+ LineBreakRendering lineBreakRendering();
+
/**
* @return true for stripping new lines and render text as "single line",
* false for keeping all line breaks.
+ * @deprecated Use {@link #lineBreakRendering()} instead
*/
+ @Deprecated
boolean stripNewlines();
/**
diff --git a/commonmark/src/main/java/org/commonmark/renderer/text/TextContentRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/text/TextContentRenderer.java
index aacfbb82a..d64d0c7ef 100644
--- a/commonmark/src/main/java/org/commonmark/renderer/text/TextContentRenderer.java
+++ b/commonmark/src/main/java/org/commonmark/renderer/text/TextContentRenderer.java
@@ -9,14 +9,17 @@
import java.util.ArrayList;
import java.util.List;
+/**
+ * Renders nodes to plain text content with minimal markup-like additions.
+ */
public class TextContentRenderer implements Renderer {
- private final boolean stripNewlines;
+ private final LineBreakRendering lineBreakRendering;
private final List nodeRendererFactories;
private TextContentRenderer(Builder builder) {
- this.stripNewlines = builder.stripNewlines;
+ this.lineBreakRendering = builder.lineBreakRendering;
this.nodeRendererFactories = new ArrayList<>(builder.nodeRendererFactories.size() + 1);
this.nodeRendererFactories.addAll(builder.nodeRendererFactories);
@@ -30,7 +33,7 @@ public NodeRenderer create(TextContentNodeRendererContext context) {
}
/**
- * Create a new builder for configuring an {@link TextContentRenderer}.
+ * Create a new builder for configuring a {@link TextContentRenderer}.
*
* @return a builder
*/
@@ -40,7 +43,7 @@ public static Builder builder() {
@Override
public void render(Node node, Appendable output) {
- RendererContext context = new RendererContext(new TextContentWriter(output));
+ RendererContext context = new RendererContext(new TextContentWriter(output, lineBreakRendering));
context.render(node);
}
@@ -52,12 +55,12 @@ public String render(Node node) {
}
/**
- * Builder for configuring an {@link TextContentRenderer}. See methods for default configuration.
+ * Builder for configuring a {@link TextContentRenderer}. See methods for default configuration.
*/
public static class Builder {
- private boolean stripNewlines = false;
private List nodeRendererFactories = new ArrayList<>();
+ private LineBreakRendering lineBreakRendering = LineBreakRendering.COMPACT;
/**
* @return the configured {@link TextContentRenderer}
@@ -66,15 +69,29 @@ public TextContentRenderer build() {
return new TextContentRenderer(this);
}
+ /**
+ * Configure how line breaks (newlines) are rendered, see {@link LineBreakRendering}.
+ * The default is {@link LineBreakRendering#COMPACT}.
+ *
+ * @param lineBreakRendering the mode to use
+ * @return {@code this}
+ */
+ public Builder lineBreakRendering(LineBreakRendering lineBreakRendering) {
+ this.lineBreakRendering = lineBreakRendering;
+ return this;
+ }
+
/**
* Set the value of flag for stripping new lines.
*
* @param stripNewlines true for stripping new lines and render text as "single line",
* false for keeping all line breaks
* @return {@code this}
+ * @deprecated Use {@link #lineBreakRendering(LineBreakRendering)} with {@link LineBreakRendering#STRIP} instead
*/
+ @Deprecated
public Builder stripNewlines(boolean stripNewlines) {
- this.stripNewlines = stripNewlines;
+ this.lineBreakRendering = stripNewlines ? LineBreakRendering.STRIP : LineBreakRendering.COMPACT;
return this;
}
@@ -123,17 +140,20 @@ private class RendererContext implements TextContentNodeRendererContext {
private RendererContext(TextContentWriter textContentWriter) {
this.textContentWriter = textContentWriter;
- // The first node renderer for a node type "wins".
- for (int i = nodeRendererFactories.size() - 1; i >= 0; i--) {
- TextContentNodeRendererFactory nodeRendererFactory = nodeRendererFactories.get(i);
- NodeRenderer nodeRenderer = nodeRendererFactory.create(this);
- nodeRendererMap.add(nodeRenderer);
+ for (var factory : nodeRendererFactories) {
+ var renderer = factory.create(this);
+ nodeRendererMap.add(renderer);
}
}
+ @Override
+ public LineBreakRendering lineBreakRendering() {
+ return lineBreakRendering;
+ }
+
@Override
public boolean stripNewlines() {
- return stripNewlines;
+ return lineBreakRendering == LineBreakRendering.STRIP;
}
@Override
diff --git a/commonmark/src/main/java/org/commonmark/renderer/text/TextContentWriter.java b/commonmark/src/main/java/org/commonmark/renderer/text/TextContentWriter.java
index 0ea56e621..1fb482785 100644
--- a/commonmark/src/main/java/org/commonmark/renderer/text/TextContentWriter.java
+++ b/commonmark/src/main/java/org/commonmark/renderer/text/TextContentWriter.java
@@ -1,47 +1,141 @@
package org.commonmark.renderer.text;
import java.io.IOException;
+import java.util.LinkedList;
public class TextContentWriter {
private final Appendable buffer;
+ private final LineBreakRendering lineBreakRendering;
+ private final LinkedList prefixes = new LinkedList<>();
+ private final LinkedList tight = new LinkedList<>();
+
+ private String blockSeparator = null;
private char lastChar;
public TextContentWriter(Appendable out) {
- buffer = out;
+ this(out, LineBreakRendering.COMPACT);
+ }
+
+ public TextContentWriter(Appendable out, LineBreakRendering lineBreakRendering) {
+ this.buffer = out;
+ this.lineBreakRendering = lineBreakRendering;
}
public void whitespace() {
if (lastChar != 0 && lastChar != ' ') {
- append(' ');
+ write(' ');
}
}
public void colon() {
if (lastChar != 0 && lastChar != ':') {
- append(':');
+ write(':');
}
}
public void line() {
- if (lastChar != 0 && lastChar != '\n') {
- append('\n');
- }
+ append('\n');
+ writePrefixes();
+ }
+
+ public void block() {
+ blockSeparator = lineBreakRendering == LineBreakRendering.STRIP ? " " : //
+ lineBreakRendering == LineBreakRendering.COMPACT || isTight() ? "\n" : "\n\n";
+ }
+
+ public void resetBlock() {
+ blockSeparator = null;
}
public void writeStripped(String s) {
- append(s.replaceAll("[\\r\\n\\s]+", " "));
+ write(s.replaceAll("[\\r\\n\\s]+", " "));
}
public void write(String s) {
+ flushBlockSeparator();
append(s);
}
public void write(char c) {
+ flushBlockSeparator();
append(c);
}
+ /**
+ * Push a prefix onto the top of the stack. All prefixes are written at the beginning of each line, until the
+ * prefix is popped again.
+ *
+ * @param prefix the raw prefix string
+ */
+ public void pushPrefix(String prefix) {
+ prefixes.addLast(prefix);
+ }
+
+ /**
+ * Write a prefix.
+ *
+ * @param prefix the raw prefix string to write
+ */
+ public void writePrefix(String prefix) {
+ write(prefix);
+ }
+
+ /**
+ * Remove the last prefix from the top of the stack.
+ */
+ public void popPrefix() {
+ prefixes.removeLast();
+ }
+
+ /**
+ * Change whether blocks are tight or loose. Loose is the default where blocks are separated by a blank line. Tight
+ * is where blocks are not separated by a blank line. Tight blocks are used in lists, if there are no blank lines
+ * within the list.
+ *
+ * Note that changing this does not affect block separators that have already been enqueued with {@link #block()},
+ * only future ones.
+ */
+ public void pushTight(boolean tight) {
+ this.tight.addLast(tight);
+ }
+
+ /**
+ * Remove the last "tight" setting from the top of the stack.
+ */
+ public void popTight() {
+ this.tight.removeLast();
+ }
+
+ private boolean isTight() {
+ return !tight.isEmpty() && tight.getLast();
+ }
+
+ private void writePrefixes() {
+ for (String prefix : prefixes) {
+ append(prefix);
+ }
+ }
+
+ /**
+ * If a block separator has been enqueued with {@link #block()} but not yet written, write it now.
+ */
+ private void flushBlockSeparator() {
+ if (blockSeparator != null) {
+ if (blockSeparator.equals("\n") || blockSeparator.equals("\n\n")) {
+ for (int i = 0; i < blockSeparator.length(); i++) {
+ var sep = blockSeparator.charAt(i);
+ append(sep);
+ writePrefixes();
+ }
+ } else {
+ append(blockSeparator);
+ }
+ blockSeparator = null;
+ }
+ }
+
private void append(String s) {
try {
buffer.append(s);
diff --git a/commonmark/src/main/java/org/commonmark/renderer/text/package-info.java b/commonmark/src/main/java/org/commonmark/renderer/text/package-info.java
index 07a558091..8309f4bd6 100644
--- a/commonmark/src/main/java/org/commonmark/renderer/text/package-info.java
+++ b/commonmark/src/main/java/org/commonmark/renderer/text/package-info.java
@@ -1,4 +1,4 @@
/**
- * Text content rendering (see {@link org.commonmark.renderer.text.TextContentRenderer})
+ * Plain text rendering with minimal markup (see {@link org.commonmark.renderer.text.TextContentRenderer})
*/
package org.commonmark.renderer.text;
diff --git a/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java b/commonmark/src/main/java/org/commonmark/text/AsciiMatcher.java
similarity index 65%
rename from commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java
rename to commonmark/src/main/java/org/commonmark/text/AsciiMatcher.java
index 82d83ca46..0d9cea458 100644
--- a/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java
+++ b/commonmark/src/main/java/org/commonmark/text/AsciiMatcher.java
@@ -1,7 +1,11 @@
-package org.commonmark.internal.util;
+package org.commonmark.text;
import java.util.BitSet;
+import java.util.Set;
+/**
+ * Char matcher that can match ASCII characters efficiently.
+ */
public class AsciiMatcher implements CharMatcher {
private final BitSet set;
@@ -22,6 +26,10 @@ public static Builder builder() {
return new Builder(new BitSet());
}
+ public static Builder builder(AsciiMatcher matcher) {
+ return new Builder((BitSet) matcher.set.clone());
+ }
+
public static class Builder {
private final BitSet set;
@@ -37,6 +45,20 @@ public Builder c(char c) {
return this;
}
+ public Builder anyOf(String s) {
+ for (int i = 0; i < s.length(); i++) {
+ c(s.charAt(i));
+ }
+ return this;
+ }
+
+ public Builder anyOf(Set characters) {
+ for (Character c : characters) {
+ c(c);
+ }
+ return this;
+ }
+
public Builder range(char from, char toInclusive) {
for (char c = from; c <= toInclusive; c++) {
c(c);
diff --git a/commonmark/src/main/java/org/commonmark/text/CharMatcher.java b/commonmark/src/main/java/org/commonmark/text/CharMatcher.java
new file mode 100644
index 000000000..2833e65c3
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/text/CharMatcher.java
@@ -0,0 +1,13 @@
+package org.commonmark.text;
+
+/**
+ * Matcher interface for {@code char} values.
+ *
+ * Note that because this matches on {@code char} values only (as opposed to {@code int} code points),
+ * this only operates on the level of code units and doesn't support supplementary characters
+ * (see {@link Character#isSupplementaryCodePoint(int)}).
+ */
+public interface CharMatcher {
+
+ boolean matches(char c);
+}
diff --git a/commonmark/src/main/java/org/commonmark/text/Characters.java b/commonmark/src/main/java/org/commonmark/text/Characters.java
new file mode 100644
index 000000000..ee56ca67e
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/text/Characters.java
@@ -0,0 +1,157 @@
+package org.commonmark.text;
+
+/**
+ * Functions for finding characters in strings or checking characters.
+ */
+public class Characters {
+
+ public static int find(char c, CharSequence s, int startIndex) {
+ int length = s.length();
+ for (int i = startIndex; i < length; i++) {
+ if (s.charAt(i) == c) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ public static int findLineBreak(CharSequence s, int startIndex) {
+ int length = s.length();
+ for (int i = startIndex; i < length; i++) {
+ switch (s.charAt(i)) {
+ case '\n':
+ case '\r':
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * @see blank line
+ */
+ public static boolean isBlank(CharSequence s) {
+ return skipSpaceTab(s, 0, s.length()) == s.length();
+ }
+
+ public static boolean hasNonSpace(CharSequence s) {
+ int length = s.length();
+ int skipped = skip(' ', s, 0, length);
+ return skipped != length;
+ }
+
+ public static boolean isLetter(CharSequence s, int index) {
+ int codePoint = Character.codePointAt(s, index);
+ return Character.isLetter(codePoint);
+ }
+
+ public static boolean isSpaceOrTab(CharSequence s, int index) {
+ if (index < s.length()) {
+ switch (s.charAt(index)) {
+ case ' ':
+ case '\t':
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * @see Unicode punctuation character
+ */
+ public static boolean isPunctuationCodePoint(int codePoint) {
+ switch (Character.getType(codePoint)) {
+ // General category "P" (punctuation)
+ case Character.DASH_PUNCTUATION:
+ case Character.START_PUNCTUATION:
+ case Character.END_PUNCTUATION:
+ case Character.CONNECTOR_PUNCTUATION:
+ case Character.OTHER_PUNCTUATION:
+ case Character.INITIAL_QUOTE_PUNCTUATION:
+ case Character.FINAL_QUOTE_PUNCTUATION:
+ // General category "S" (symbol)
+ case Character.MATH_SYMBOL:
+ case Character.CURRENCY_SYMBOL:
+ case Character.MODIFIER_SYMBOL:
+ case Character.OTHER_SYMBOL:
+ return true;
+ default:
+ switch (codePoint) {
+ case '$':
+ case '+':
+ case '<':
+ case '=':
+ case '>':
+ case '^':
+ case '`':
+ case '|':
+ case '~':
+ return true;
+ default:
+ return false;
+ }
+ }
+ }
+
+ /**
+ * Check whether the provided code point is a Unicode whitespace character as defined in the spec.
+ *
+ * @see Unicode whitespace character
+ */
+ public static boolean isWhitespaceCodePoint(int codePoint) {
+ switch (codePoint) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\f':
+ case '\r':
+ return true;
+ default:
+ return Character.getType(codePoint) == Character.SPACE_SEPARATOR;
+ }
+ }
+
+ public static int skip(char skip, CharSequence s, int startIndex, int endIndex) {
+ for (int i = startIndex; i < endIndex; i++) {
+ if (s.charAt(i) != skip) {
+ return i;
+ }
+ }
+ return endIndex;
+ }
+
+ public static int skipBackwards(char skip, CharSequence s, int startIndex, int lastIndex) {
+ for (int i = startIndex; i >= lastIndex; i--) {
+ if (s.charAt(i) != skip) {
+ return i;
+ }
+ }
+ return lastIndex - 1;
+ }
+
+ public static int skipSpaceTab(CharSequence s, int startIndex, int endIndex) {
+ for (int i = startIndex; i < endIndex; i++) {
+ switch (s.charAt(i)) {
+ case ' ':
+ case '\t':
+ break;
+ default:
+ return i;
+ }
+ }
+ return endIndex;
+ }
+
+ public static int skipSpaceTabBackwards(CharSequence s, int startIndex, int lastIndex) {
+ for (int i = startIndex; i >= lastIndex; i--) {
+ switch (s.charAt(i)) {
+ case ' ':
+ case '\t':
+ break;
+ default:
+ return i;
+ }
+ }
+ return lastIndex - 1;
+ }
+}
diff --git a/commonmark/src/main/java/org/commonmark/text/package-info.java b/commonmark/src/main/java/org/commonmark/text/package-info.java
new file mode 100644
index 000000000..ab9eec6f1
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/text/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Text processing utilities for parsing and rendering, exported for use by extensions
+ */
+package org.commonmark.text;
diff --git a/commonmark/src/main/resources/META-INF/LICENSE.txt b/commonmark/src/main/resources/META-INF/LICENSE.txt
new file mode 100644
index 000000000..b09e367ce
--- /dev/null
+++ b/commonmark/src/main/resources/META-INF/LICENSE.txt
@@ -0,0 +1,23 @@
+Copyright (c) 2015, Atlassian Pty Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/commonmark/src/main/resources/org/commonmark/internal/util/entities.properties b/commonmark/src/main/resources/org/commonmark/internal/util/entities.txt
similarity index 100%
rename from commonmark/src/main/resources/org/commonmark/internal/util/entities.properties
rename to commonmark/src/main/resources/org/commonmark/internal/util/entities.txt
diff --git a/commonmark/src/test/java/org/commonmark/ProfilingMain.java b/commonmark/src/test/java/org/commonmark/ProfilingMain.java
index 31ae2b5f5..83b1bdaff 100644
--- a/commonmark/src/test/java/org/commonmark/ProfilingMain.java
+++ b/commonmark/src/test/java/org/commonmark/ProfilingMain.java
@@ -6,7 +6,6 @@
import org.commonmark.testutil.TestResources;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
public class ProfilingMain {
@@ -20,7 +19,7 @@ public static void main(String[] args) throws Exception {
System.out.println("Attach profiler, then press enter to start parsing.");
System.in.read();
System.out.println("Parsing");
- List nodes = parse(Collections.singletonList(SPEC));
+ List nodes = parse(List.of(SPEC));
System.out.println("Finished parsing, press enter to start rendering");
System.in.read();
System.out.println(render(nodes));
diff --git a/commonmark/src/test/java/org/commonmark/internal/DocumentParserTest.java b/commonmark/src/test/java/org/commonmark/internal/DocumentParserTest.java
index c4d848362..a834665ff 100644
--- a/commonmark/src/test/java/org/commonmark/internal/DocumentParserTest.java
+++ b/commonmark/src/test/java/org/commonmark/internal/DocumentParserTest.java
@@ -2,20 +2,16 @@
import org.commonmark.node.*;
import org.commonmark.parser.block.BlockParserFactory;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.util.Arrays;
-import java.util.Collections;
+import java.util.HashSet;
import java.util.List;
import java.util.Set;
-import java.util.HashSet;
-import static org.hamcrest.CoreMatchers.is;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
+import static org.assertj.core.api.Assertions.assertThat;
-public class DocumentParserTest {
- private static List CORE_FACTORIES = Arrays.asList(
+class DocumentParserTest {
+ private static final List CORE_FACTORIES = List.of(
new BlockQuoteParser.Factory(),
new HeadingParser.Factory(),
new FencedCodeBlockParser.Factory(),
@@ -25,28 +21,28 @@ public class DocumentParserTest {
new IndentedCodeBlockParser.Factory());
@Test
- public void calculateBlockParserFactories_givenAFullListOfAllowedNodes_includesAllCoreFactories() {
- List customParserFactories = Collections.emptyList();
- Set> nodes = new HashSet<>(Arrays.asList(BlockQuote.class, Heading.class, FencedCodeBlock.class, HtmlBlock.class, ThematicBreak.class, ListBlock.class, IndentedCodeBlock.class));
+ void calculateBlockParserFactories_givenAFullListOfAllowedNodes_includesAllCoreFactories() {
+ List customParserFactories = List.of();
+ var enabledBlockTypes = Set.of(BlockQuote.class, Heading.class, FencedCodeBlock.class, HtmlBlock.class, ThematicBreak.class, ListBlock.class, IndentedCodeBlock.class);
- List blockParserFactories = DocumentParser.calculateBlockParserFactories(customParserFactories, nodes);
- assertThat(blockParserFactories.size(), is(CORE_FACTORIES.size()));
+ List blockParserFactories = DocumentParser.calculateBlockParserFactories(customParserFactories, enabledBlockTypes);
+ assertThat(blockParserFactories).hasSameSizeAs(CORE_FACTORIES);
for (BlockParserFactory factory : CORE_FACTORIES) {
- assertTrue(hasInstance(blockParserFactories, factory.getClass()));
+ assertThat(hasInstance(blockParserFactories, factory.getClass())).isTrue();
}
}
@Test
- public void calculateBlockParserFactories_givenAListOfAllowedNodes_includesAssociatedFactories() {
- List customParserFactories = Collections.emptyList();
+ void calculateBlockParserFactories_givenAListOfAllowedNodes_includesAssociatedFactories() {
+ List customParserFactories = List.of();
Set