+ * The {@link #getLabel() label} is the text in brackets after {@code ^}, so {@code foo} in the example. The contents
+ * of the footnote are child nodes of the definition, a {@link org.commonmark.node.Paragraph} in the example.
+ *
+ * Footnote definitions are parsed even if there's no corresponding {@link FootnoteReference}.
+ */
+public class FootnoteDefinition extends CustomBlock {
+
+ private String label;
+
+ public FootnoteDefinition(String label) {
+ this.label = label;
+ }
+
+ public String getLabel() {
+ return label;
+ }
+}
+
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnoteReference.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnoteReference.java
new file mode 100644
index 000000000..61dcf8626
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnoteReference.java
@@ -0,0 +1,21 @@
+package org.commonmark.ext.footnotes;
+
+import org.commonmark.node.CustomNode;
+
+/**
+ * A footnote reference, e.g. [^foo] in Some text with a footnote[^foo]
+ *
+ * The {@link #getLabel() label} is the text within brackets after {@code ^}, so {@code foo} in the example. It needs to
+ * match the label of a corresponding {@link FootnoteDefinition} for the footnote to be parsed.
+ */
+public class FootnoteReference extends CustomNode {
+ private String label;
+
+ public FootnoteReference(String label) {
+ this.label = label;
+ }
+
+ public String getLabel() {
+ return label;
+ }
+}
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnotesExtension.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnotesExtension.java
new file mode 100644
index 000000000..dd532fa34
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/FootnotesExtension.java
@@ -0,0 +1,105 @@
+package org.commonmark.ext.footnotes;
+
+import org.commonmark.Extension;
+import org.commonmark.ext.footnotes.internal.*;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.NodeRenderer;
+import org.commonmark.renderer.html.HtmlRenderer;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory;
+import org.commonmark.renderer.markdown.MarkdownRenderer;
+
+import java.util.Set;
+
+/**
+ * Extension for footnotes with syntax like GitHub Flavored Markdown:
+ *
+ * Some text with a footnote[^1].
+ *
+ * [^1]: The text of the footnote.
+ *
+ * The [^1] is a {@link FootnoteReference}, with "1" being the label.
+ *
+ * The line with [^1]: ... is a {@link FootnoteDefinition}, with the contents as child nodes (can be a
+ * paragraph like in the example, or other blocks like lists).
+ *
+ * All the footnotes (definitions) will be rendered in a list at the end of a document, no matter where they appear in
+ * the source. The footnotes will be numbered starting from 1, then 2, etc, depending on the order in which they appear
+ * in the text (and not dependent on the label). The footnote reference is a link to the footnote, and from the footnote
+ * there is a link back to the reference (or multiple).
+ *
+ * There is also optional support for inline footnotes, use {@link #builder()} and then set {@link Builder#inlineFootnotes}.
+ *
+ * @see GitHub docs for footnotes
+ */
+public class FootnotesExtension implements Parser.ParserExtension,
+ HtmlRenderer.HtmlRendererExtension,
+ MarkdownRenderer.MarkdownRendererExtension {
+
+ private final boolean inlineFootnotes;
+
+ private FootnotesExtension(boolean inlineFootnotes) {
+ this.inlineFootnotes = inlineFootnotes;
+ }
+
+ /**
+ * The extension with the default configuration (no support for inline footnotes).
+ */
+ public static Extension create() {
+ return builder().build();
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ @Override
+ public void extend(Parser.Builder parserBuilder) {
+ parserBuilder
+ .customBlockParserFactory(new FootnoteBlockParser.Factory())
+ .linkProcessor(new FootnoteLinkProcessor());
+ if (inlineFootnotes) {
+ parserBuilder.linkMarker('^');
+ }
+ }
+
+ @Override
+ public void extend(HtmlRenderer.Builder rendererBuilder) {
+ rendererBuilder.nodeRendererFactory(FootnoteHtmlNodeRenderer::new);
+ }
+
+ @Override
+ public void extend(MarkdownRenderer.Builder rendererBuilder) {
+ rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() {
+ @Override
+ public NodeRenderer create(MarkdownNodeRendererContext context) {
+ return new FootnoteMarkdownNodeRenderer(context);
+ }
+
+ @Override
+ public Set getSpecialCharacters() {
+ return Set.of();
+ }
+ });
+ }
+
+ public static class Builder {
+
+ private boolean inlineFootnotes = false;
+
+ /**
+ * Enable support for inline footnotes without definitions, e.g.:
+ *
+ * Some text^[this is an inline footnote]
+ *
+ */
+ public Builder inlineFootnotes(boolean inlineFootnotes) {
+ this.inlineFootnotes = inlineFootnotes;
+ return this;
+ }
+
+ public FootnotesExtension build() {
+ return new FootnotesExtension(inlineFootnotes);
+ }
+ }
+}
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/InlineFootnote.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/InlineFootnote.java
new file mode 100644
index 000000000..665d01936
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/InlineFootnote.java
@@ -0,0 +1,6 @@
+package org.commonmark.ext.footnotes;
+
+import org.commonmark.node.CustomNode;
+
+public class InlineFootnote extends CustomNode {
+}
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBlockParser.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBlockParser.java
new file mode 100644
index 000000000..110bdef20
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteBlockParser.java
@@ -0,0 +1,105 @@
+package org.commonmark.ext.footnotes.internal;
+
+import org.commonmark.ext.footnotes.FootnoteDefinition;
+import org.commonmark.node.Block;
+import org.commonmark.node.DefinitionMap;
+import org.commonmark.parser.block.*;
+import org.commonmark.text.Characters;
+
+import java.util.List;
+
+/**
+ * Parser for a single {@link FootnoteDefinition} block.
+ */
+public class FootnoteBlockParser extends AbstractBlockParser {
+
+ private final FootnoteDefinition block;
+
+ public FootnoteBlockParser(String label) {
+ block = new FootnoteDefinition(label);
+ }
+
+ @Override
+ public Block getBlock() {
+ return block;
+ }
+
+ @Override
+ public boolean isContainer() {
+ return true;
+ }
+
+ @Override
+ public boolean canContain(Block childBlock) {
+ return true;
+ }
+
+ @Override
+ public BlockContinue tryContinue(ParserState parserState) {
+ if (parserState.getIndent() >= 4) {
+ // It looks like content needs to be indented by 4 so that it's part of a footnote (instead of starting a new block).
+ return BlockContinue.atColumn(4);
+ } else if (parserState.isBlank()) {
+ // A blank line doesn't finish a footnote yet. If there's another line with indent >= 4 after it,
+ // that should result in another paragraph in this footnote definition.
+ return BlockContinue.atIndex(parserState.getIndex());
+ } else {
+ // We're not continuing to give other block parsers a chance to interrupt this definition.
+ // But if no other block parser applied (including another FootnotesBlockParser), we will
+ // accept the line via lazy continuation (same as a block quote).
+ return BlockContinue.none();
+ }
+ }
+
+ @Override
+ public List> getDefinitions() {
+ var map = new DefinitionMap<>(FootnoteDefinition.class);
+ map.putIfAbsent(block.getLabel(), block);
+ return List.of(map);
+ }
+
+ public static class Factory implements BlockParserFactory {
+
+ @Override
+ public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) {
+ if (state.getIndent() >= 4) {
+ return BlockStart.none();
+ }
+ var index = state.getNextNonSpaceIndex();
+ var content = state.getLine().getContent();
+ if (content.charAt(index) != '[' || index + 1 >= content.length()) {
+ return BlockStart.none();
+ }
+ index++;
+ if (content.charAt(index) != '^' || index + 1 >= content.length()) {
+ return BlockStart.none();
+ }
+ // Now at first label character (if any)
+ index++;
+ var labelStart = index;
+
+ for (index = labelStart; index < content.length(); index++) {
+ var c = content.charAt(index);
+ switch (c) {
+ case ']':
+ if (index > labelStart && index + 1 < content.length() && content.charAt(index + 1) == ':') {
+ var label = content.subSequence(labelStart, index).toString();
+ // After the colon, any number of spaces is skipped (not part of the content)
+ var afterSpaces = Characters.skipSpaceTab(content, index + 2, content.length());
+ return BlockStart.of(new FootnoteBlockParser(label)).atIndex(afterSpaces);
+ } else {
+ return BlockStart.none();
+ }
+ case ' ':
+ case '\r':
+ case '\n':
+ case '\0':
+ case '\t':
+ return BlockStart.none();
+ }
+ }
+
+ return BlockStart.none();
+ }
+ }
+}
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteHtmlNodeRenderer.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteHtmlNodeRenderer.java
new file mode 100644
index 000000000..70eb048a3
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteHtmlNodeRenderer.java
@@ -0,0 +1,391 @@
+package org.commonmark.ext.footnotes.internal;
+
+import org.commonmark.ext.footnotes.FootnoteDefinition;
+import org.commonmark.ext.footnotes.FootnoteReference;
+import org.commonmark.ext.footnotes.InlineFootnote;
+import org.commonmark.node.*;
+import org.commonmark.renderer.NodeRenderer;
+import org.commonmark.renderer.html.HtmlNodeRendererContext;
+import org.commonmark.renderer.html.HtmlWriter;
+
+import java.util.*;
+import java.util.function.Consumer;
+
+/**
+ * HTML rendering for footnotes.
+ *
+ * Aims to match the rendering of cmark-gfm (which is slightly different from GitHub's when it comes to class
+ * attributes, not sure why).
+ *
+ * Some notes on how rendering works:
+ *
+ *
Footnotes are numbered according to the order of references, starting at 1
+ *
Definitions are rendered at the end of the document, regardless of where the definition was in the source
+ *
Definitions are ordered by number
+ *
Definitions have links back to their references (one or more)
+ *
+ *
+ *
Nested footnotes
+ * Text in footnote definitions can reference other footnotes, even ones that aren't referenced in the main text.
+ * This makes them tricky because it's not enough to just go through the main text for references.
+ * And before we can render a definition, we need to know all references (because we add links back to references).
+ *
+ * In other words, footnotes form a directed graph. Footnotes can reference each other so cycles are possible too.
+ *
+ * One way to implement it, which is what cmark-gfm does, is to go through the whole document (including definitions)
+ * and find all references in order. That guarantees that all definitions are found, but it has strange results for
+ * ordering or when the reference is in an unreferenced definition, see tests. In graph terms, it renders all
+ * definitions that have an incoming edge, no matter whether they are connected to the main text or not.
+ *
+ * The way we implement it:
+ *
+ *
Start with the references in the main text; we can render them as we go
+ *
After the main text is rendered, we have the referenced definitions, but there might be more from definition text
+ *
To find the remaining definitions, we visit the definitions from before to look at references
+ *
Repeat (breadth-first search) until we've found all definitions (note that we can't render before that's done because of backrefs)
+ *
Now render the definitions (and any references inside)
+ *
+ * This means we only render definitions whose references are actually rendered, and in a meaningful order (all main
+ * text footnotes first, then any nested ones).
+ */
+public class FootnoteHtmlNodeRenderer implements NodeRenderer {
+
+ private final HtmlWriter html;
+ private final HtmlNodeRendererContext context;
+
+ /**
+ * All definitions (even potentially unused ones), for looking up references
+ */
+ private DefinitionMap definitionMap;
+
+ /**
+ * Definitions that were referenced, in order in which they should be rendered.
+ */
+ private final Map referencedDefinitions = new LinkedHashMap<>();
+
+ /**
+ * Information about references that should be rendered as footnotes. This doesn't contain all references, just the
+ * ones from inside definitions.
+ */
+ private final Map references = new HashMap<>();
+
+ public FootnoteHtmlNodeRenderer(HtmlNodeRendererContext context) {
+ this.html = context.getWriter();
+ this.context = context;
+ }
+
+ @Override
+ public Set> getNodeTypes() {
+ return Set.of(FootnoteReference.class, InlineFootnote.class, FootnoteDefinition.class);
+ }
+
+ @Override
+ public void beforeRoot(Node rootNode) {
+ // Collect all definitions first, so we can look them up when encountering a reference later.
+ var visitor = new DefinitionVisitor();
+ rootNode.accept(visitor);
+ definitionMap = visitor.definitions;
+ }
+
+ @Override
+ public void render(Node node) {
+ if (node instanceof FootnoteReference) {
+ // This is called for all references, even ones inside definitions that we render at the end.
+ // Inside definitions, we have registered the reference already.
+ var ref = (FootnoteReference) node;
+ // Use containsKey because if value is null, we don't need to try registering again.
+ var info = references.containsKey(ref) ? references.get(ref) : tryRegisterReference(ref);
+ if (info != null) {
+ renderReference(ref, info);
+ } else {
+ // A reference without a corresponding definition is rendered as plain text
+ html.text("[^" + ref.getLabel() + "]");
+ }
+ } else if (node instanceof InlineFootnote) {
+ var info = references.get(node);
+ if (info == null) {
+ info = registerReference(node, null);
+ }
+ renderReference(node, info);
+ }
+ }
+
+ @Override
+ public void afterRoot(Node rootNode) {
+ // Now render the referenced definitions if there are any.
+ if (referencedDefinitions.isEmpty()) {
+ return;
+ }
+
+ var firstDef = referencedDefinitions.keySet().iterator().next();
+ var attrs = new LinkedHashMap();
+ attrs.put("class", "footnotes");
+ attrs.put("data-footnotes", null);
+ html.tag("section", context.extendAttributes(firstDef, "section", attrs));
+ html.line();
+ html.tag("ol");
+ html.line();
+
+ // Check whether there are any footnotes inside the definitions that we're about to render. For those, we might
+ // need to render more definitions. So do a breadth-first search to find all relevant definitions.
+ var check = new LinkedList<>(referencedDefinitions.keySet());
+ while (!check.isEmpty()) {
+ var def = check.removeFirst();
+ def.accept(new ShallowReferenceVisitor(def, node -> {
+ if (node instanceof FootnoteReference) {
+ var ref = (FootnoteReference) node;
+ var d = definitionMap.get(ref.getLabel());
+ if (d != null) {
+ if (!referencedDefinitions.containsKey(d)) {
+ check.addLast(d);
+ }
+ references.put(ref, registerReference(d, d.getLabel()));
+ }
+ } else if (node instanceof InlineFootnote) {
+ check.addLast(node);
+ references.put(node, registerReference(node, null));
+ }
+ }));
+ }
+
+ for (var entry : referencedDefinitions.entrySet()) {
+ // This will also render any footnote references inside definitions
+ renderDefinition(entry.getKey(), entry.getValue());
+ }
+
+ html.tag("/ol");
+ html.line();
+ html.tag("/section");
+ html.line();
+ }
+
+ private ReferenceInfo tryRegisterReference(FootnoteReference ref) {
+ var def = definitionMap.get(ref.getLabel());
+ if (def == null) {
+ return null;
+ }
+ return registerReference(def, def.getLabel());
+ }
+
+ private ReferenceInfo registerReference(Node node, String label) {
+ // The first referenced definition gets number 1, second one 2, etc.
+ var referencedDef = referencedDefinitions.computeIfAbsent(node, k -> {
+ var num = referencedDefinitions.size() + 1;
+ var key = definitionKey(label, num);
+ return new ReferencedDefinition(num, key);
+ });
+ var definitionNumber = referencedDef.definitionNumber;
+ // The reference number for that particular definition. E.g. if there's two references for the same definition,
+ // the first one is 1, the second one 2, etc. This is needed to give each reference a unique ID so that each
+ // reference can get its own backlink from the definition.
+ var refNumber = referencedDef.references.size() + 1;
+ var definitionKey = referencedDef.definitionKey;
+ var id = referenceId(definitionKey, refNumber);
+ referencedDef.references.add(id);
+
+ return new ReferenceInfo(id, definitionId(definitionKey), definitionNumber);
+ }
+
+ private void renderReference(Node node, ReferenceInfo referenceInfo) {
+ html.tag("sup", context.extendAttributes(node, "sup", Map.of("class", "footnote-ref")));
+
+ var href = "#" + referenceInfo.definitionId;
+ var attrs = new LinkedHashMap();
+ attrs.put("href", href);
+ attrs.put("id", referenceInfo.id);
+ attrs.put("data-footnote-ref", null);
+ html.tag("a", context.extendAttributes(node, "a", attrs));
+ html.raw(String.valueOf(referenceInfo.definitionNumber));
+ html.tag("/a");
+ html.tag("/sup");
+ }
+
+ private void renderDefinition(Node def, ReferencedDefinition referencedDefinition) {
+ var attrs = new LinkedHashMap();
+ attrs.put("id", definitionId(referencedDefinition.definitionKey));
+ html.tag("li", context.extendAttributes(def, "li", attrs));
+ html.line();
+
+ if (def.getLastChild() instanceof Paragraph) {
+ // Add backlinks into last paragraph before
. This is what GFM does.
+ var lastParagraph = (Paragraph) def.getLastChild();
+ var node = def.getFirstChild();
+ while (node != lastParagraph) {
+ if (node instanceof Paragraph) {
+ // Because we're manually rendering the
for the last paragraph, do the same for all other
+ // paragraphs for consistency (Paragraph rendering might be overwritten by a custom renderer).
+ html.tag("p", context.extendAttributes(node, "p", Map.of()));
+ renderChildren(node);
+ html.tag("/p");
+ html.line();
+ } else {
+ context.render(node);
+ }
+ node = node.getNext();
+ }
+
+ html.tag("p", context.extendAttributes(lastParagraph, "p", Map.of()));
+ renderChildren(lastParagraph);
+ html.raw(" ");
+ renderBackrefs(def, referencedDefinition);
+ html.tag("/p");
+ html.line();
+ } else if (def instanceof InlineFootnote) {
+ html.tag("p", context.extendAttributes(def, "p", Map.of()));
+ renderChildren(def);
+ html.raw(" ");
+ renderBackrefs(def, referencedDefinition);
+ html.tag("/p");
+ html.line();
+ } else {
+ renderChildren(def);
+ html.line();
+ renderBackrefs(def, referencedDefinition);
+ }
+
+ html.tag("/li");
+ html.line();
+ }
+
+ private void renderBackrefs(Node def, ReferencedDefinition referencedDefinition) {
+ var refs = referencedDefinition.references;
+ for (int i = 0; i < refs.size(); i++) {
+ var ref = refs.get(i);
+ var refNumber = i + 1;
+ var idx = referencedDefinition.definitionNumber + (refNumber > 1 ? ("-" + refNumber) : "");
+
+ var attrs = new LinkedHashMap();
+ attrs.put("href", "#" + ref);
+ attrs.put("class", "footnote-backref");
+ attrs.put("data-footnote-backref", null);
+ attrs.put("data-footnote-backref-idx", idx);
+ attrs.put("aria-label", "Back to reference " + idx);
+ html.tag("a", context.extendAttributes(def, "a", attrs));
+ if (refNumber > 1) {
+ html.tag("sup", context.extendAttributes(def, "sup", Map.of("class", "footnote-ref")));
+ html.raw(String.valueOf(refNumber));
+ html.tag("/sup");
+ }
+ // U+21A9 LEFTWARDS ARROW WITH HOOK
+ html.raw("\u21A9");
+ html.tag("/a");
+ if (i + 1 < refs.size()) {
+ html.raw(" ");
+ }
+ }
+ }
+
+ private String referenceId(String definitionKey, int number) {
+ return "fnref" + definitionKey + (number == 1 ? "" : ("-" + number));
+ }
+
+ private String definitionKey(String label, int number) {
+ // Named definitions use the pattern "fn-{name}" and inline definitions use "fn{number}" so as not to conflict.
+ // "fn{number}" is also what pandoc uses (for all types), starting with number 1.
+ if (label != null) {
+ return "-" + label;
+ } else {
+ return "" + number;
+ }
+ }
+
+ private String definitionId(String definitionKey) {
+ return "fn" + definitionKey;
+ }
+
+ private void renderChildren(Node parent) {
+ Node node = parent.getFirstChild();
+ while (node != null) {
+ Node next = node.getNext();
+ context.render(node);
+ node = next;
+ }
+ }
+
+ private static class DefinitionVisitor extends AbstractVisitor {
+
+ private final DefinitionMap definitions = new DefinitionMap<>(FootnoteDefinition.class);
+
+ @Override
+ public void visit(CustomBlock customBlock) {
+ if (customBlock instanceof FootnoteDefinition) {
+ var def = (FootnoteDefinition) customBlock;
+ definitions.putIfAbsent(def.getLabel(), def);
+ } else {
+ super.visit(customBlock);
+ }
+ }
+ }
+
+ /**
+ * Visit footnote references/inline footnotes inside the parent (but not the parent itself). We want a shallow visit
+ * because the caller wants to control when to descend.
+ */
+ private static class ShallowReferenceVisitor extends AbstractVisitor {
+ private final Node parent;
+ private final Consumer consumer;
+
+ private ShallowReferenceVisitor(Node parent, Consumer consumer) {
+ this.parent = parent;
+ this.consumer = consumer;
+ }
+
+ @Override
+ public void visit(CustomNode customNode) {
+ if (customNode instanceof FootnoteReference) {
+ consumer.accept(customNode);
+ } else if (customNode instanceof InlineFootnote) {
+ if (customNode == parent) {
+ // Descend into the parent (inline footnotes can contain inline footnotes)
+ super.visit(customNode);
+ } else {
+ // Don't descend here because we want to be shallow.
+ consumer.accept(customNode);
+ }
+ } else {
+ super.visit(customNode);
+ }
+ }
+ }
+
+ private static class ReferencedDefinition {
+ /**
+ * The definition number, starting from 1, and in order in which they're referenced.
+ */
+ final int definitionNumber;
+ /**
+ * The unique key of the definition. Together with a static prefix it forms the ID used in the HTML.
+ */
+ final String definitionKey;
+ /**
+ * The IDs of references for this definition, for backrefs.
+ */
+ final List references = new ArrayList<>();
+
+ ReferencedDefinition(int definitionNumber, String definitionKey) {
+ this.definitionNumber = definitionNumber;
+ this.definitionKey = definitionKey;
+ }
+ }
+
+ private static class ReferenceInfo {
+ /**
+ * The ID of the reference; in the corresponding definition, a link back to this reference will be rendered.
+ */
+ private final String id;
+ /**
+ * The ID of the definition, for linking to the definition.
+ */
+ private final String definitionId;
+ /**
+ * The definition number, rendered in superscript.
+ */
+ private final int definitionNumber;
+
+ private ReferenceInfo(String id, String definitionId, int definitionNumber) {
+ this.id = id;
+ this.definitionId = definitionId;
+ this.definitionNumber = definitionNumber;
+ }
+ }
+}
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteLinkProcessor.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteLinkProcessor.java
new file mode 100644
index 000000000..07b008576
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteLinkProcessor.java
@@ -0,0 +1,57 @@
+package org.commonmark.ext.footnotes.internal;
+
+import org.commonmark.ext.footnotes.FootnoteDefinition;
+import org.commonmark.ext.footnotes.FootnoteReference;
+import org.commonmark.ext.footnotes.InlineFootnote;
+import org.commonmark.node.LinkReferenceDefinition;
+import org.commonmark.parser.InlineParserContext;
+import org.commonmark.parser.beta.LinkInfo;
+import org.commonmark.parser.beta.LinkProcessor;
+import org.commonmark.parser.beta.LinkResult;
+import org.commonmark.parser.beta.Scanner;
+
+/**
+ * For turning e.g. [^foo] into a {@link FootnoteReference},
+ * and ^[foo] into an {@link InlineFootnote}.
+ */
+public class FootnoteLinkProcessor implements LinkProcessor {
+ @Override
+ public LinkResult process(LinkInfo linkInfo, Scanner scanner, InlineParserContext context) {
+
+ if (linkInfo.marker() != null && linkInfo.marker().getLiteral().equals("^")) {
+ // An inline footnote like ^[footnote text]. Note that we only get the marker here if the option is enabled
+ // on the extension.
+ return LinkResult.wrapTextIn(new InlineFootnote(), linkInfo.afterTextBracket()).includeMarker();
+ }
+
+ if (linkInfo.destination() != null) {
+ // If it's an inline link, it can't be a footnote reference
+ return LinkResult.none();
+ }
+
+ var text = linkInfo.text();
+ if (!text.startsWith("^")) {
+ // Footnote reference needs to start with [^
+ return LinkResult.none();
+ }
+
+ if (linkInfo.label() != null && context.getDefinition(LinkReferenceDefinition.class, linkInfo.label()) != null) {
+ // If there's a label after the text and the label has a definition -> it's a link, and it should take
+ // preference, e.g. in `[^foo][bar]` if `[bar]` has a definition, `[^foo]` won't be a footnote reference.
+ return LinkResult.none();
+ }
+
+ var label = text.substring(1);
+ // Check if we have a definition, otherwise ignore (same behavior as for link reference definitions).
+ // Note that the definition parser already checked the syntax of the label, we don't need to check again.
+ var def = context.getDefinition(FootnoteDefinition.class, label);
+ if (def == null) {
+ return LinkResult.none();
+ }
+
+ // For footnotes, we only ever consume the text part of the link, not the label part (if any)
+ var position = linkInfo.afterTextBracket();
+ // If the marker is `![`, we don't want to include the `!`, so start from bracket
+ return LinkResult.replaceWith(new FootnoteReference(label), position);
+ }
+}
diff --git a/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteMarkdownNodeRenderer.java b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteMarkdownNodeRenderer.java
new file mode 100644
index 000000000..3dcf4fc83
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/java/org/commonmark/ext/footnotes/internal/FootnoteMarkdownNodeRenderer.java
@@ -0,0 +1,70 @@
+package org.commonmark.ext.footnotes.internal;
+
+import org.commonmark.ext.footnotes.FootnoteDefinition;
+import org.commonmark.ext.footnotes.FootnoteReference;
+import org.commonmark.ext.footnotes.InlineFootnote;
+import org.commonmark.node.*;
+import org.commonmark.renderer.NodeRenderer;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownWriter;
+
+import java.util.Set;
+
+public class FootnoteMarkdownNodeRenderer implements NodeRenderer {
+
+ private final MarkdownWriter writer;
+ private final MarkdownNodeRendererContext context;
+
+ public FootnoteMarkdownNodeRenderer(MarkdownNodeRendererContext context) {
+ this.writer = context.getWriter();
+ this.context = context;
+ }
+
+ @Override
+ public Set> getNodeTypes() {
+ return Set.of(FootnoteReference.class, InlineFootnote.class, FootnoteDefinition.class);
+ }
+
+ @Override
+ public void render(Node node) {
+ if (node instanceof FootnoteReference) {
+ renderReference((FootnoteReference) node);
+ } else if (node instanceof InlineFootnote) {
+ renderInline((InlineFootnote) node);
+ } else if (node instanceof FootnoteDefinition) {
+ renderDefinition((FootnoteDefinition) node);
+ }
+ }
+
+ private void renderReference(FootnoteReference ref) {
+ writer.raw("[^");
+ // The label is parsed as-is without escaping, so we can render it back as-is
+ writer.raw(ref.getLabel());
+ writer.raw("]");
+ }
+
+ private void renderInline(InlineFootnote inlineFootnote) {
+ writer.raw("^[");
+ renderChildren(inlineFootnote);
+ writer.raw("]");
+ }
+
+ private void renderDefinition(FootnoteDefinition def) {
+ writer.raw("[^");
+ writer.raw(def.getLabel());
+ writer.raw("]: ");
+
+ writer.pushPrefix(" ");
+ renderChildren(def);
+ writer.popPrefix();
+ }
+
+ private void renderChildren(Node parent) {
+ Node node = parent.getFirstChild();
+ while (node != null) {
+ Node next = node.getNext();
+ context.render(node);
+ node = next;
+ }
+ }
+}
diff --git a/commonmark-ext-footnotes/src/main/javadoc/overview.html b/commonmark-ext-footnotes/src/main/javadoc/overview.html
new file mode 100644
index 000000000..4f19d2115
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/javadoc/overview.html
@@ -0,0 +1,6 @@
+
+
+Extension for footnotes using [^1] syntax
+
See {@link org.commonmark.ext.footnotes.FootnotesExtension}
+
+
diff --git a/commonmark-ext-footnotes/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-footnotes/src/main/resources/META-INF/LICENSE.txt
new file mode 100644
index 000000000..b09e367ce
--- /dev/null
+++ b/commonmark-ext-footnotes/src/main/resources/META-INF/LICENSE.txt
@@ -0,0 +1,23 @@
+Copyright (c) 2015, Atlassian Pty Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnoteHtmlRendererTest.java b/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnoteHtmlRendererTest.java
new file mode 100644
index 000000000..bc7d4f74c
--- /dev/null
+++ b/commonmark-ext-footnotes/src/test/java/org/commonmark/ext/footnotes/FootnoteHtmlRendererTest.java
@@ -0,0 +1,339 @@
+package org.commonmark.ext.footnotes;
+
+import org.commonmark.Extension;
+import org.commonmark.node.Document;
+import org.commonmark.node.Paragraph;
+import org.commonmark.node.Text;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.html.HtmlRenderer;
+import org.commonmark.testutil.Asserts;
+import org.commonmark.testutil.RenderingTestCase;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+import java.util.Set;
+
+public class FootnoteHtmlRendererTest extends RenderingTestCase {
+ private static final Set EXTENSIONS = Set.of(FootnotesExtension.create());
+ private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
+ private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build();
+
+ @Test
+ public void testOne() {
+ assertRendering("Test [^foo]\n\n[^foo]: note\n",
+ "
\n" +
+ "\n" +
+ "\n");
+ }
+
+ @Test
+ public void testLabelNormalization() {
+ // Labels match via their normalized form. For the href and IDs to match, rendering needs to use the
+ // label from the definition consistently.
+ assertRendering("Test [^bar]\n\n[^BAR]: note\n",
+ "
\n" +
+ "\n" +
+ "\n");
+ }
+
+ @Test
+ public void testMultipleReferences() {
+ // Tests a few things:
+ // - Numbering is based on the reference order, not the definition order
+ // - The same number is used when a definition is referenced multiple times
+ // - Multiple backrefs are rendered
+ assertRendering("First [^foo]\n\nThen [^bar]\n\nThen [^foo] again\n\n[^bar]: b\n[^foo]: f\n",
+ "
\n" +
+ "\n" +
+ "\n");
+ }
+
+ @Test
+ public void testNestedFootnotesOrder() {
+ // GitHub has a strange result here, the definitions are in order: 1. bar, 2. foo.
+ // The reason is that the number is done based on all references in document order, including references in
+ // definitions. So [^bar] from the first line is first.
+ assertRendering("[^foo]: foo [^bar]\n" +
+ "\n" +
+ "[^foo]\n" +
+ "\n" +
+ "[^bar]: bar\n", "
\n" +
+ "\n" +
+ "\n");
+ }
+
+ @Test
+ public void testNestedFootnotesUnreferenced() {
+ // This should not result in any footnotes, as baz itself isn't referenced.
+ // But GitHub renders bar only, with a broken backref, because bar is referenced from foo.
+ assertRendering("[^foo]: foo[^bar]\n" +
+ "[^bar]: bar\n", "");
+
+ // And here only 1 is rendered.
+ assertRendering("[^1]\n" +
+ "\n" +
+ "[^1]: one\n" +
+ "[^foo]: foo[^bar]\n" +
+ "[^bar]: bar\n", "
\n" +
+ "\n" +
+ "\n");
+ }
+
+ @Test
+ public void testInlineFootnoteWithReference() {
+ // This is a bit tricky because the IDs need to be unique.
+ assertRenderingInline("Test ^[inline [^1]]\n" +
+ "\n" +
+ "[^1]: normal",
+ "
\n" +
+ "\n" +
+ "\n");
+ }
+
+
+ @Test
+ public void testRenderNodesDirectly() {
+ // Everything should work as expected when rendering from nodes directly (no parsing step).
+ var doc = new Document();
+ var p = new Paragraph();
+ p.appendChild(new Text("Test "));
+ p.appendChild(new FootnoteReference("foo"));
+ var def = new FootnoteDefinition("foo");
+ var note = new Paragraph();
+ note.appendChild(new Text("note!"));
+ def.appendChild(note);
+ doc.appendChild(p);
+ doc.appendChild(def);
+
+ var expected = "
+````````````````````````````````
+
+## Alert content
+
+Marker alone in first paragraph, blank line, then content:
+
+```````````````````````````````` example alert
+> [!NOTE]
+>
+> Content
+.
+
+
Note
+
Content
+
+````````````````````````````````
+
+Multiple paragraphs:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> First paragraph
+>
+> Second paragraph
+.
+
+
Note
+
First paragraph
+
Second paragraph
+
+````````````````````````````````
+
+Inline formatting:
+
+```````````````````````````````` example alert
+> [!TIP]
+> This is **bold** and *italic*
+.
+
+````````````````````````````````
+
+List inside alert:
+
+```````````````````````````````` example alert
+> [!IMPORTANT]
+> Items:
+> - First item
+> - Second item
+.
+
+
Important
+
Items:
+
+
First item
+
Second item
+
+
+````````````````````````````````
+
+Links inside alert:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> Check out [this link](https://example.com) for more info
+.
+
+````````````````````````````````
+
+Empty lines in middle of alert:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> First
+>
+>
+> After empty lines
+.
+
+
Note
+
First
+
After empty lines
+
+````````````````````````````````
+
+## Not an alert
+
+Text after marker on the same line:
+
+```````````````````````````````` example alert
+> [!NOTE] Some text
+.
+
+
[!NOTE] Some text
+
+````````````````````````````````
+
+Unknown type:
+
+```````````````````````````````` example alert
+> [!INVALID]
+> Some text
+.
+
+
[!INVALID]
+Some text
+
+````````````````````````````````
+
+Unconfigured custom type is not an alert:
+
+```````````````````````````````` example alert
+> [!INFO]
+> Should be blockquote
+.
+
+
[!INFO]
+Should be blockquote
+
+````````````````````````````````
+
+Marker with no content:
+
+```````````````````````````````` example alert
+> [!NOTE]
+.
+
+
[!NOTE]
+
+````````````````````````````````
+
+Whitespace-only content after marker:
+
+```````````````````````````````` example alert
+> [!TIP]
+>
+>
+.
+
+
[!TIP]
+
+````````````````````````````````
+
+Extra space inside marker:
+
+```````````````````````````````` example alert
+> [! NOTE]
+> Should be blockquote
+.
+
+
[! NOTE]
+Should be blockquote
+
+````````````````````````````````
+
+Missing brackets:
+
+```````````````````````````````` example alert
+> !NOTE
+> Should be blockquote
+.
+
+
!NOTE
+Should be blockquote
+
+````````````````````````````````
+
+Missing exclamation mark:
+
+```````````````````````````````` example alert
+> [NOTE]
+> Should be blockquote
+.
+
+
[NOTE]
+Should be blockquote
+
+````````````````````````````````
+
+Regular blockquote is not affected:
+
+```````````````````````````````` example alert
+> This is a regular blockquote
+.
+
+
This is a regular blockquote
+
+````````````````````````````````
+
+## Boundaries
+
+Trailing spaces after marker:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> This is a note
+.
+
+
Note
+
This is a note
+
+````````````````````````````````
+
+Trailing tabs after marker:
+
+```````````````````````````````` example alert
+> [!WARNING]→→
+> Be careful
+.
+
+
Warning
+
Be careful
+
+````````````````````````````````
+
+Leading spaces before blockquote marker:
+
+```````````````````````````````` example alert
+ > [!IMPORTANT]
+ > Content
+.
+
+
Important
+
Content
+
+````````````````````````````````
+
+Blank line after marker ends the blockquote (not an alert):
+
+```````````````````````````````` example alert
+> [!NOTE]
+
+Some text
+.
+
+
[!NOTE]
+
+
Some text
+````````````````````````````````
+
+Alert followed by blockquote:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> This is an alert
+
+> This is a blockquote
+.
+
+
Note
+
This is an alert
+
+
+
This is a blockquote
+
+````````````````````````````````
+
+Adjacent alerts:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> First alert
+
+> [!WARNING]
+> Second alert
+.
+
+
Note
+
First alert
+
+
+
Warning
+
Second alert
+
+````````````````````````````````
+
+## Nesting and containers
+
+Nested alert inside alert renders as blockquote:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> This is a note
+>> [!WARNING]
+>> Nested content
+.
+
+
Note
+
This is a note
+
+
[!WARNING]
+Nested content
+
+
+````````````````````````````````
+
+Nested blockquote inside alert:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> This is a note
+>> Nested blockquote
+.
+
+
Note
+
This is a note
+
+
Nested blockquote
+
+
+````````````````````````````````
+
+Alert inside list item stays as blockquote:
+
+```````````````````````````````` example alert
+- > [!NOTE]
+ > Test
+.
+
+
+
+
[!NOTE]
+Test
+
+
+
+````````````````````````````````
+
+Alert marker in content is treated as text:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> This is a note
+> [!WARNING]
+> This is still part of the note
+.
+
+
Note
+
This is a note
+[!WARNING]
+This is still part of the note
+
+````````````````````````````````
+
+## Continuation and interruption
+
+Lazy continuation:
+
+```````````````````````````````` example alert
+> [!NOTE]
+> First line
+Lazy continuation
+> Continues alert
+.
+
+
Note
+
First line
+Lazy continuation
+Continues alert
+
+````````````````````````````````
+
+Alert type after regular blockquote content is not an alert:
+
+```````````````````````````````` example alert
+> Regular blockquote
+> [!NOTE]
+> More text
+.
+
+
Regular blockquote
+[!NOTE]
+More text
+
+````````````````````````````````
diff --git a/commonmark-ext-gfm-alerts/src/test/resources/generate-alerts-spec.java b/commonmark-ext-gfm-alerts/src/test/resources/generate-alerts-spec.java
new file mode 100644
index 000000000..06192f107
--- /dev/null
+++ b/commonmark-ext-gfm-alerts/src/test/resources/generate-alerts-spec.java
@@ -0,0 +1,111 @@
+///usr/bin/env jbang "$0" "$@" ; exit $?
+
+// Generates alerts-spec.txt from alerts-spec-template.md by rendering each example
+// through the GitHub Markdown API and inserting the normalized HTML expectation.
+//
+// Prerequisites: gh CLI installed and authenticated (gh auth login)
+// Usage: cd commonmark-ext-gfm-alerts/src/test/resources && jbang generate-alerts-spec.java
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
+class GenerateAlertsSpec {
+
+ private static final String FENCE = "````````````````````````````````";
+ private static final String EXAMPLE_OPEN = FENCE + " example alert";
+
+ public static void main(String[] args) throws Exception {
+ var templatePath = Path.of("alerts-spec-template.md");
+ if (!Files.exists(templatePath)) {
+ System.err.println("Run from the directory containing alerts-spec-template.md");
+ System.exit(1);
+ }
+
+ var lines = Files.readAllLines(templatePath);
+ var output = new ArrayList();
+ var header = "Expectations verified against GitHub Markdown API (gh api markdown -f mode=gfm).\n" +
+ "Our HTML omits GitHub's SVG icons and uses a `data-alert-type` attribute instead.";
+
+ int exampleCount = 0;
+ int i = 0;
+ while (i < lines.size()) {
+ var line = lines.get(i);
+
+ // Insert header after the first heading
+ if (i == 0 && line.startsWith("# ")) {
+ output.add(line);
+ output.add("");
+ output.add(header);
+ i++;
+ continue;
+ }
+
+ if (line.equals(EXAMPLE_OPEN)) {
+ // Collect source lines until closing fence
+ output.add(line);
+ i++;
+ var sourceLines = new ArrayList();
+ while (i < lines.size() && !lines.get(i).equals(FENCE)) {
+ sourceLines.add(lines.get(i));
+ output.add(lines.get(i));
+ i++;
+ }
+
+ // Render via GitHub API (→ represents tabs in the spec format)
+ var source = String.join("\n", sourceLines).replace("\u2192", "\t");
+ exampleCount++;
+ System.out.printf("%d: %s%n", exampleCount,
+ source.substring(0, Math.min(40, source.length())).replace("\n", "\\n"));
+
+ var ghHtml = normalizeHtml(renderViaGh(source));
+
+ // Insert separator and HTML expectation
+ output.add(".");
+ output.add(ghHtml);
+ output.add(FENCE);
+ i++; // skip closing fence from template
+ } else {
+ output.add(line);
+ i++;
+ }
+ }
+
+ var specPath = Path.of("alerts-spec.txt");
+ Files.writeString(specPath, String.join("\n", output) + "\n");
+ System.out.println("Done — " + exampleCount + " examples written to alerts-spec.txt");
+ }
+
+ static String renderViaGh(String markdown) throws Exception {
+ var process = new ProcessBuilder("gh", "api", "markdown", "-f", "mode=gfm", "-f", "text=" + markdown)
+ .redirectErrorStream(true)
+ .start();
+ var output = new String(process.getInputStream().readAllBytes());
+ if (process.waitFor() != 0) {
+ throw new RuntimeException("gh api failed: " + output);
+ }
+ return output;
+ }
+
+ // Normalize GitHub API HTML to match our renderer output.
+ static String normalizeHtml(String html) {
+ // Strip GitHub-specific elements and attributes
+ html = Pattern.compile("", Pattern.DOTALL).matcher(html).replaceAll("");
+ html = html.replaceAll(" (dir=\"auto\"|rel=\"nofollow\"|class=\"notranslate\")", "");
+ // Add data-alert-type and insert newlines to match our renderer's formatting
+ html = Pattern.compile("class=\"markdown-alert markdown-alert-(\\w+)\"")
+ .matcher(html)
+ .replaceAll("class=\"markdown-alert markdown-alert-$1\" data-alert-type=\"$1\"");
+ html = Pattern.compile("(data-alert-type=\"\\w+\">)(
", "
\n
");
+ return html.replace("\r\n", "\n").lines()
+ .map(String::stripTrailing)
+ .reduce((a, b) -> a + "\n" + b)
+ .orElse("")
+ .strip();
+ }
+}
\ No newline at end of file
diff --git a/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.core.runtime.prefs b/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.core.runtime.prefs
deleted file mode 100644
index 5a0ad22d2..000000000
--- a/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.core.runtime.prefs
+++ /dev/null
@@ -1,2 +0,0 @@
-eclipse.preferences.version=1
-line.separator=\n
diff --git a/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.jdt.core.prefs b/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.jdt.core.prefs
deleted file mode 100644
index 3c0d27c8f..000000000
--- a/commonmark-ext-gfm-strikethrough/.settings/org.eclipse.jdt.core.prefs
+++ /dev/null
@@ -1,290 +0,0 @@
-eclipse.preferences.version=1
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
-org.eclipse.jdt.core.compiler.compliance=1.7
-org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
-org.eclipse.jdt.core.compiler.source=1.7
-org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
-org.eclipse.jdt.core.formatter.alignment_for_assignment=0
-org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
-org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
-org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
-org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
-org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
-org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0
-org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
-org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80
-org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
-org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch=16
-org.eclipse.jdt.core.formatter.blank_lines_after_imports=1
-org.eclipse.jdt.core.formatter.blank_lines_after_package=1
-org.eclipse.jdt.core.formatter.blank_lines_before_field=0
-org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0
-org.eclipse.jdt.core.formatter.blank_lines_before_imports=1
-org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1
-org.eclipse.jdt.core.formatter.blank_lines_before_method=1
-org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1
-org.eclipse.jdt.core.formatter.blank_lines_before_package=0
-org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1
-org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1
-org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_lambda_body=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line
-org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
-org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
-org.eclipse.jdt.core.formatter.comment.format_block_comments=true
-org.eclipse.jdt.core.formatter.comment.format_header=false
-org.eclipse.jdt.core.formatter.comment.format_html=true
-org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
-org.eclipse.jdt.core.formatter.comment.format_line_comments=true
-org.eclipse.jdt.core.formatter.comment.format_source_code=true
-org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true
-org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
-org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
-org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert
-org.eclipse.jdt.core.formatter.comment.line_length=120
-org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true
-org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true
-org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=false
-org.eclipse.jdt.core.formatter.compact_else_if=true
-org.eclipse.jdt.core.formatter.continuation_indentation=2
-org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2
-org.eclipse.jdt.core.formatter.disabling_tag=@formatter\:off
-org.eclipse.jdt.core.formatter.enabling_tag=@formatter\:on
-org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false
-org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true
-org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true
-org.eclipse.jdt.core.formatter.indent_empty_lines=false
-org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true
-org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true
-org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true
-org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true
-org.eclipse.jdt.core.formatter.indentation.size=4
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_label=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_type_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert
-org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert
-org.eclipse.jdt.core.formatter.insert_space_after_lambda_arrow=insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources=insert
-org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert
-org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_lambda_arrow=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert
-org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert
-org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert
-org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.join_lines_in_comments=true
-org.eclipse.jdt.core.formatter.join_wrapped_lines=false
-org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
-org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
-org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
-org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
-org.eclipse.jdt.core.formatter.lineSplit=120
-org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
-org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
-org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
-org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1
-org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true
-org.eclipse.jdt.core.formatter.tabulation.char=space
-org.eclipse.jdt.core.formatter.tabulation.size=4
-org.eclipse.jdt.core.formatter.use_on_off_tags=false
-org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false
-org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true
-org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true
-org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true
-org.eclipse.jdt.core.javaFormatter=org.eclipse.jdt.core.defaultJavaFormatter
diff --git a/commonmark-ext-gfm-strikethrough/pom.xml b/commonmark-ext-gfm-strikethrough/pom.xml
index 3df0581e6..9d8f55e5f 100644
--- a/commonmark-ext-gfm-strikethrough/pom.xml
+++ b/commonmark-ext-gfm-strikethrough/pom.xml
@@ -2,9 +2,9 @@
4.0.0
- com.atlassian.commonmark
+ org.commonmarkcommonmark-parent
- 0.14.1-SNAPSHOT
+ 0.28.1-SNAPSHOTcommonmark-ext-gfm-strikethrough
@@ -13,31 +13,15 @@
- com.atlassian.commonmark
+ org.commonmarkcommonmark
- com.atlassian.commonmark
+ org.commonmarkcommonmark-test-utiltest
-
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
-
- org.commonmark.ext.gfm.strikethrough
-
-
-
-
-
-
-
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/module-info.java b/commonmark-ext-gfm-strikethrough/src/main/java/module-info.java
new file mode 100644
index 000000000..b6204934b
--- /dev/null
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/module-info.java
@@ -0,0 +1,5 @@
+module org.commonmark.ext.gfm.strikethrough {
+ exports org.commonmark.ext.gfm.strikethrough;
+
+ requires transitive org.commonmark;
+}
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java
index 115ae9ea4..0c24642bc 100644
--- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java
@@ -4,19 +4,23 @@
import org.commonmark.node.Delimited;
/**
- * A strikethrough node containing text and other inline nodes nodes as children.
+ * A strikethrough node containing text and other inline nodes as children.
*/
public class Strikethrough extends CustomNode implements Delimited {
- private static final String DELIMITER = "~~";
+ private String delimiter;
+
+ public Strikethrough(String delimiter) {
+ this.delimiter = delimiter;
+ }
@Override
public String getOpeningDelimiter() {
- return DELIMITER;
+ return delimiter;
}
@Override
public String getClosingDelimiter() {
- return DELIMITER;
+ return delimiter;
}
}
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java
index 3d0839f11..364205aed 100644
--- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java
@@ -1,42 +1,78 @@
package org.commonmark.ext.gfm.strikethrough;
import org.commonmark.Extension;
-import org.commonmark.renderer.text.TextContentRenderer;
-import org.commonmark.renderer.text.TextContentNodeRendererContext;
-import org.commonmark.renderer.text.TextContentNodeRendererFactory;
import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughDelimiterProcessor;
import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughHtmlNodeRenderer;
+import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughMarkdownNodeRenderer;
import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughTextContentNodeRenderer;
-import org.commonmark.renderer.html.HtmlRenderer;
-import org.commonmark.renderer.html.HtmlNodeRendererContext;
-import org.commonmark.renderer.html.HtmlNodeRendererFactory;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.NodeRenderer;
+import org.commonmark.renderer.html.HtmlNodeRendererContext;
+import org.commonmark.renderer.html.HtmlNodeRendererFactory;
+import org.commonmark.renderer.html.HtmlRenderer;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory;
+import org.commonmark.renderer.markdown.MarkdownRenderer;
+import org.commonmark.renderer.text.TextContentNodeRendererContext;
+import org.commonmark.renderer.text.TextContentNodeRendererFactory;
+import org.commonmark.renderer.text.TextContentRenderer;
+
+import java.util.Set;
/**
- * Extension for GFM strikethrough using ~~ (GitHub Flavored Markdown).
+ * Extension for GFM strikethrough using {@code ~} or {@code ~~} (GitHub Flavored Markdown).
+ *
Example input:
+ *
{@code ~foo~ or ~~bar~~}
+ *
Example output (HTML):
+ *
{@code foo or bar}
*
- * Create it with {@link #create()} and then configure it on the builders
+ * Create the extension with {@link #create()} and then add it to the parser and renderer builders
* ({@link org.commonmark.parser.Parser.Builder#extensions(Iterable)},
* {@link HtmlRenderer.Builder#extensions(Iterable)}).
*
*
* The parsed strikethrough text regions are turned into {@link Strikethrough} nodes.
*
+ *
+ * If you have another extension that only uses a single tilde ({@code ~}) syntax, you will have to configure this
+ * {@link StrikethroughExtension} to only accept the double tilde syntax, like this:
+ *
+ * If you don't do that, there's a conflict between the two extensions and you will get an
+ * {@link IllegalArgumentException} when constructing the parser.
+ *
*/
public class StrikethroughExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension,
- TextContentRenderer.TextContentRendererExtension {
+ TextContentRenderer.TextContentRendererExtension, MarkdownRenderer.MarkdownRendererExtension {
- private StrikethroughExtension() {
+ private final boolean requireTwoTildes;
+
+ private StrikethroughExtension(Builder builder) {
+ this.requireTwoTildes = builder.requireTwoTildes;
}
+ /**
+ * @return the extension with default options
+ */
public static Extension create() {
- return new StrikethroughExtension();
+ return builder().build();
+ }
+
+ /**
+ * @return a builder to configure the behavior of the extension
+ */
+ public static Builder builder() {
+ return new Builder();
}
@Override
public void extend(Parser.Builder parserBuilder) {
- parserBuilder.customDelimiterProcessor(new StrikethroughDelimiterProcessor());
+ parserBuilder.customDelimiterProcessor(new StrikethroughDelimiterProcessor(requireTwoTildes));
}
@Override
@@ -58,4 +94,41 @@ public NodeRenderer create(TextContentNodeRendererContext context) {
}
});
}
+
+ @Override
+ public void extend(MarkdownRenderer.Builder rendererBuilder) {
+ rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() {
+ @Override
+ public NodeRenderer create(MarkdownNodeRendererContext context) {
+ return new StrikethroughMarkdownNodeRenderer(context);
+ }
+
+ @Override
+ public Set getSpecialCharacters() {
+ return Set.of('~');
+ }
+ });
+ }
+
+ public static class Builder {
+
+ private boolean requireTwoTildes = false;
+
+ /**
+ * @param requireTwoTildes Whether two tilde characters ({@code ~~}) are required for strikethrough or whether
+ * one is also enough. Default is {@code false}; both a single tilde and two tildes can be used for strikethrough.
+ * @return {@code this}
+ */
+ public Builder requireTwoTildes(boolean requireTwoTildes) {
+ this.requireTwoTildes = requireTwoTildes;
+ return this;
+ }
+
+ /**
+ * @return a configured extension
+ */
+ public Extension build() {
+ return new StrikethroughExtension(this);
+ }
+ }
}
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java
index dd881b419..4657106ab 100644
--- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java
@@ -2,12 +2,24 @@
import org.commonmark.ext.gfm.strikethrough.Strikethrough;
import org.commonmark.node.Node;
+import org.commonmark.node.Nodes;
+import org.commonmark.node.SourceSpans;
import org.commonmark.node.Text;
import org.commonmark.parser.delimiter.DelimiterProcessor;
import org.commonmark.parser.delimiter.DelimiterRun;
public class StrikethroughDelimiterProcessor implements DelimiterProcessor {
+ private final boolean requireTwoTildes;
+
+ public StrikethroughDelimiterProcessor() {
+ this(false);
+ }
+
+ public StrikethroughDelimiterProcessor(boolean requireTwoTildes) {
+ this.requireTwoTildes = requireTwoTildes;
+ }
+
@Override
public char getOpeningCharacter() {
return '~';
@@ -20,31 +32,36 @@ public char getClosingCharacter() {
@Override
public int getMinLength() {
- return 2;
+ return requireTwoTildes ? 2 : 1;
}
@Override
- public int getDelimiterUse(DelimiterRun opener, DelimiterRun closer) {
- if (opener.length() >= 2 && closer.length() >= 2) {
- // Use exactly two delimiters even if we have more, and don't care about internal openers/closers.
- return 2;
+ public int process(DelimiterRun openingRun, DelimiterRun closingRun) {
+ if (openingRun.length() == closingRun.length() && openingRun.length() <= 2) {
+ // GitHub only accepts either one or two delimiters, but not a mix or more than that.
+
+ Text opener = openingRun.getOpener();
+
+ // Wrap nodes between delimiters in strikethrough.
+ String delimiter = openingRun.length() == 1 ? opener.getLiteral() : opener.getLiteral() + opener.getLiteral();
+ Node strikethrough = new Strikethrough(delimiter);
+
+ SourceSpans sourceSpans = new SourceSpans();
+ sourceSpans.addAllFrom(openingRun.getOpeners(openingRun.length()));
+
+ for (Node node : Nodes.between(opener, closingRun.getCloser())) {
+ strikethrough.appendChild(node);
+ sourceSpans.addAll(node.getSourceSpans());
+ }
+
+ sourceSpans.addAllFrom(closingRun.getClosers(closingRun.length()));
+ strikethrough.setSourceSpans(sourceSpans.getSourceSpans());
+
+ opener.insertAfter(strikethrough);
+
+ return openingRun.length();
} else {
return 0;
}
}
-
- @Override
- public void process(Text opener, Text closer, int delimiterCount) {
- // Wrap nodes between delimiters in strikethrough.
- Node strikethrough = new Strikethrough();
-
- Node tmp = opener.getNext();
- while (tmp != null && tmp != closer) {
- Node next = tmp.getNext();
- strikethrough.appendChild(tmp);
- tmp = next;
- }
-
- opener.insertAfter(strikethrough);
- }
}
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlNodeRenderer.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlNodeRenderer.java
index 4dd0de39b..b1a82cb03 100644
--- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlNodeRenderer.java
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughHtmlNodeRenderer.java
@@ -1,10 +1,9 @@
package org.commonmark.ext.gfm.strikethrough.internal;
-import org.commonmark.renderer.html.HtmlWriter;
-import org.commonmark.renderer.html.HtmlNodeRendererContext;
import org.commonmark.node.Node;
+import org.commonmark.renderer.html.HtmlNodeRendererContext;
+import org.commonmark.renderer.html.HtmlWriter;
-import java.util.Collections;
import java.util.Map;
public class StrikethroughHtmlNodeRenderer extends StrikethroughNodeRenderer {
@@ -19,7 +18,7 @@ public StrikethroughHtmlNodeRenderer(HtmlNodeRendererContext context) {
@Override
public void render(Node node) {
- Map attributes = context.extendAttributes(node, "del", Collections.emptyMap());
+ Map attributes = context.extendAttributes(node, "del", Map.of());
html.tag("del", attributes);
renderChildren(node);
html.tag("/del");
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughMarkdownNodeRenderer.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughMarkdownNodeRenderer.java
new file mode 100644
index 000000000..1c91dd64f
--- /dev/null
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughMarkdownNodeRenderer.java
@@ -0,0 +1,34 @@
+package org.commonmark.ext.gfm.strikethrough.internal;
+
+import org.commonmark.ext.gfm.strikethrough.Strikethrough;
+import org.commonmark.node.Node;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownWriter;
+
+public class StrikethroughMarkdownNodeRenderer extends StrikethroughNodeRenderer {
+
+ private final MarkdownNodeRendererContext context;
+ private final MarkdownWriter writer;
+
+ public StrikethroughMarkdownNodeRenderer(MarkdownNodeRendererContext context) {
+ this.context = context;
+ this.writer = context.getWriter();
+ }
+
+ @Override
+ public void render(Node node) {
+ Strikethrough strikethrough = (Strikethrough) node;
+ writer.raw(strikethrough.getOpeningDelimiter());
+ renderChildren(node);
+ writer.raw(strikethrough.getClosingDelimiter());
+ }
+
+ private void renderChildren(Node parent) {
+ Node node = parent.getFirstChild();
+ while (node != null) {
+ Node next = node.getNext();
+ context.render(node);
+ node = next;
+ }
+ }
+}
diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughNodeRenderer.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughNodeRenderer.java
index 4f3a12618..18ed21887 100644
--- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughNodeRenderer.java
+++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughNodeRenderer.java
@@ -4,13 +4,12 @@
import org.commonmark.node.Node;
import org.commonmark.renderer.NodeRenderer;
-import java.util.Collections;
import java.util.Set;
abstract class StrikethroughNodeRenderer implements NodeRenderer {
@Override
public Set> getNodeTypes() {
- return Collections.>singleton(Strikethrough.class);
+ return Set.of(Strikethrough.class);
}
}
diff --git a/commonmark-ext-gfm-strikethrough/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-gfm-strikethrough/src/main/resources/META-INF/LICENSE.txt
new file mode 100644
index 000000000..b09e367ce
--- /dev/null
+++ b/commonmark-ext-gfm-strikethrough/src/main/resources/META-INF/LICENSE.txt
@@ -0,0 +1,23 @@
+Copyright (c) 2015, Atlassian Pty Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughMarkdownRendererTest.java b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughMarkdownRendererTest.java
new file mode 100644
index 000000000..c497a4db3
--- /dev/null
+++ b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughMarkdownRendererTest.java
@@ -0,0 +1,35 @@
+package org.commonmark.ext.gfm.strikethrough;
+
+import org.commonmark.Extension;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.markdown.MarkdownRenderer;
+import org.junit.jupiter.api.Test;
+
+import java.util.Set;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+public class StrikethroughMarkdownRendererTest {
+
+ private static final Set EXTENSIONS = Set.of(StrikethroughExtension.create());
+ private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
+ private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build();
+
+ @Test
+ public void testStrikethrough() {
+ assertRoundTrip("~foo~ ~bar~\n");
+ assertRoundTrip("~~foo~~ ~~bar~~\n");
+ assertRoundTrip("~~f\\~oo~~ ~~bar~~\n");
+
+ assertRoundTrip("\\~foo\\~\n");
+ }
+
+ protected String render(String source) {
+ return RENDERER.render(PARSER.parse(source));
+ }
+
+ private void assertRoundTrip(String input) {
+ String rendered = render(input);
+ assertThat(rendered).isEqualTo(input);
+ }
+}
diff --git a/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughSpecTest.java b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughSpecTest.java
new file mode 100644
index 000000000..f1199b521
--- /dev/null
+++ b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughSpecTest.java
@@ -0,0 +1,42 @@
+package org.commonmark.ext.gfm.strikethrough;
+
+import org.commonmark.Extension;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.html.HtmlRenderer;
+import org.commonmark.testutil.RenderingTestCase;
+import org.commonmark.testutil.TestResources;
+import org.commonmark.testutil.example.Example;
+import org.commonmark.testutil.example.ExampleReader;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.Parameter;
+import org.junit.jupiter.params.ParameterizedClass;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.util.List;
+import java.util.Set;
+
+@ParameterizedClass
+@MethodSource("data")
+public class StrikethroughSpecTest extends RenderingTestCase {
+
+ private static final Set EXTENSIONS = Set.of(StrikethroughExtension.create());
+ private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
+ private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build();
+
+ @Parameter
+ Example example;
+
+ static List data() {
+ return ExampleReader.readExamples(TestResources.getGfmSpec(), "strikethrough");
+ }
+
+ @Test
+ public void testHtmlRendering() {
+ assertRendering(example.getSource(), example.getHtml());
+ }
+
+ @Override
+ protected String render(String source) {
+ return RENDERER.render(PARSER.parse(source));
+ }
+}
diff --git a/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughTest.java b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughTest.java
index 225977854..c29391cdd 100644
--- a/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughTest.java
+++ b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughTest.java
@@ -2,32 +2,38 @@
import org.commonmark.Extension;
import org.commonmark.node.Node;
+import org.commonmark.node.Paragraph;
+import org.commonmark.node.SourceSpan;
+import org.commonmark.node.Text;
+import org.commonmark.parser.IncludeSourceSpans;
import org.commonmark.parser.Parser;
+import org.commonmark.parser.delimiter.DelimiterProcessor;
+import org.commonmark.parser.delimiter.DelimiterRun;
import org.commonmark.renderer.html.HtmlRenderer;
import org.commonmark.renderer.text.TextContentRenderer;
import org.commonmark.testutil.RenderingTestCase;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.util.Collections;
+import java.util.List;
import java.util.Set;
-import static org.junit.Assert.assertEquals;
+import static org.assertj.core.api.Assertions.assertThat;
public class StrikethroughTest extends RenderingTestCase {
- private static final Set EXTENSIONS = Collections.singleton(StrikethroughExtension.create());
+ private static final Set EXTENSIONS = Set.of(StrikethroughExtension.create());
private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
private static final HtmlRenderer HTML_RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build();
private static final TextContentRenderer CONTENT_RENDERER = TextContentRenderer.builder()
.extensions(EXTENSIONS).build();
@Test
- public void oneTildeIsNotEnough() {
- assertRendering("~foo~", "
~foo~
\n");
+ public void oneTildeIsEnough() {
+ assertRendering("~foo~", "
foo
\n");
}
@Test
- public void twoTildesYay() {
+ public void twoTildesWorksToo() {
assertRendering("~~foo~~", "
foo
\n");
}
@@ -44,23 +50,22 @@ public void unmatched() {
@Test
public void threeInnerThree() {
- assertRendering("a ~~~foo~~~", "
a ~foo~
\n");
+ assertRendering("a ~~~foo~~~", "
a ~~~foo~~~
\n");
}
@Test
public void twoInnerThree() {
- assertRendering("~~foo~~~", "
foo~
\n");
+ assertRendering("~~foo~~~", "
~~foo~~~
\n");
}
@Test
public void tildesInside() {
assertRendering("~~foo~bar~~", "
foo~bar
\n");
assertRendering("~~foo~~bar~~", "
foobar~~
\n");
- assertRendering("~~foo~~~bar~~", "
foo~bar~~
\n");
- assertRendering("~~foo~~~~bar~~", "
foobar
\n");
- assertRendering("~~foo~~~~~bar~~", "
foo~bar
\n");
- assertRendering("~~foo~~~~~~bar~~", "
foo~~bar
\n");
- assertRendering("~~foo~~~~~~~bar~~", "
foo~~~bar
\n");
+ assertRendering("~~foo~~~bar~~", "
foo~~~bar
\n");
+ assertRendering("~~foo~~~~bar~~", "
foo~~~~bar
\n");
+ assertRendering("~~foo~~~~~bar~~", "
foo~~~~~bar
\n");
+ assertRendering("~~foo~~~~~~bar~~", "
foo~~~~~~bar
\n");
}
@Test
@@ -79,18 +84,69 @@ public void insideBlockQuote() {
public void delimited() {
Node document = PARSER.parse("~~foo~~");
Strikethrough strikethrough = (Strikethrough) document.getFirstChild().getFirstChild();
- assertEquals("~~", strikethrough.getOpeningDelimiter());
- assertEquals("~~", strikethrough.getClosingDelimiter());
+ assertThat(strikethrough.getOpeningDelimiter()).isEqualTo("~~");
+ assertThat(strikethrough.getClosingDelimiter()).isEqualTo("~~");
}
@Test
public void textContentRenderer() {
Node document = PARSER.parse("~~foo~~");
- assertEquals("/foo/", CONTENT_RENDERER.render(document));
+ assertThat(CONTENT_RENDERER.render(document)).isEqualTo("/foo/");
+ }
+
+ @Test
+ public void requireTwoTildesOption() {
+ Parser parser = Parser.builder()
+ .extensions(Set.of(StrikethroughExtension.builder()
+ .requireTwoTildes(true)
+ .build()))
+ .customDelimiterProcessor(new SubscriptDelimiterProcessor())
+ .build();
+
+ Node document = parser.parse("~foo~ ~~bar~~");
+ assertThat(CONTENT_RENDERER.render(document)).isEqualTo("(sub)foo(/sub) /bar/");
+ }
+
+ @Test
+ public void sourceSpans() {
+ Parser parser = Parser.builder()
+ .extensions(EXTENSIONS)
+ .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES)
+ .build();
+
+ Node document = parser.parse("hey ~~there~~\n");
+ Paragraph block = (Paragraph) document.getFirstChild();
+ Node strikethrough = block.getLastChild();
+ assertThat(strikethrough.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 4, 4, 9)));
}
@Override
protected String render(String source) {
return HTML_RENDERER.render(PARSER.parse(source));
}
+
+ private static class SubscriptDelimiterProcessor implements DelimiterProcessor {
+
+ @Override
+ public char getOpeningCharacter() {
+ return '~';
+ }
+
+ @Override
+ public char getClosingCharacter() {
+ return '~';
+ }
+
+ @Override
+ public int getMinLength() {
+ return 1;
+ }
+
+ @Override
+ public int process(DelimiterRun openingRun, DelimiterRun closingRun) {
+ openingRun.getOpener().insertAfter(new Text("(sub)"));
+ closingRun.getCloser().insertBefore(new Text("(/sub)"));
+ return 1;
+ }
+ }
}
diff --git a/commonmark-ext-gfm-tables/.settings/org.eclipse.core.runtime.prefs b/commonmark-ext-gfm-tables/.settings/org.eclipse.core.runtime.prefs
deleted file mode 100644
index 5a0ad22d2..000000000
--- a/commonmark-ext-gfm-tables/.settings/org.eclipse.core.runtime.prefs
+++ /dev/null
@@ -1,2 +0,0 @@
-eclipse.preferences.version=1
-line.separator=\n
diff --git a/commonmark-ext-gfm-tables/.settings/org.eclipse.jdt.core.prefs b/commonmark-ext-gfm-tables/.settings/org.eclipse.jdt.core.prefs
deleted file mode 100644
index 3c0d27c8f..000000000
--- a/commonmark-ext-gfm-tables/.settings/org.eclipse.jdt.core.prefs
+++ /dev/null
@@ -1,290 +0,0 @@
-eclipse.preferences.version=1
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
-org.eclipse.jdt.core.compiler.compliance=1.7
-org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
-org.eclipse.jdt.core.compiler.source=1.7
-org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
-org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
-org.eclipse.jdt.core.formatter.alignment_for_assignment=0
-org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
-org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
-org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
-org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
-org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
-org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0
-org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
-org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80
-org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
-org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
-org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch=16
-org.eclipse.jdt.core.formatter.blank_lines_after_imports=1
-org.eclipse.jdt.core.formatter.blank_lines_after_package=1
-org.eclipse.jdt.core.formatter.blank_lines_before_field=0
-org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0
-org.eclipse.jdt.core.formatter.blank_lines_before_imports=1
-org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1
-org.eclipse.jdt.core.formatter.blank_lines_before_method=1
-org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1
-org.eclipse.jdt.core.formatter.blank_lines_before_package=0
-org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1
-org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1
-org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_lambda_body=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line
-org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line
-org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
-org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
-org.eclipse.jdt.core.formatter.comment.format_block_comments=true
-org.eclipse.jdt.core.formatter.comment.format_header=false
-org.eclipse.jdt.core.formatter.comment.format_html=true
-org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
-org.eclipse.jdt.core.formatter.comment.format_line_comments=true
-org.eclipse.jdt.core.formatter.comment.format_source_code=true
-org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true
-org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
-org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
-org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert
-org.eclipse.jdt.core.formatter.comment.line_length=120
-org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true
-org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true
-org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=false
-org.eclipse.jdt.core.formatter.compact_else_if=true
-org.eclipse.jdt.core.formatter.continuation_indentation=2
-org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2
-org.eclipse.jdt.core.formatter.disabling_tag=@formatter\:off
-org.eclipse.jdt.core.formatter.enabling_tag=@formatter\:on
-org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false
-org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true
-org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true
-org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true
-org.eclipse.jdt.core.formatter.indent_empty_lines=false
-org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true
-org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true
-org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true
-org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true
-org.eclipse.jdt.core.formatter.indentation.size=4
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type=insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_label=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_after_type_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert
-org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert
-org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert
-org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert
-org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert
-org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert
-org.eclipse.jdt.core.formatter.insert_space_after_lambda_arrow=insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert
-org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources=insert
-org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert
-org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_lambda_arrow=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try=insert
-org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert
-org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert
-org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert
-org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert
-org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources=do not insert
-org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
-org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
-org.eclipse.jdt.core.formatter.join_lines_in_comments=true
-org.eclipse.jdt.core.formatter.join_wrapped_lines=false
-org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
-org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
-org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
-org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
-org.eclipse.jdt.core.formatter.lineSplit=120
-org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
-org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
-org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
-org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1
-org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true
-org.eclipse.jdt.core.formatter.tabulation.char=space
-org.eclipse.jdt.core.formatter.tabulation.size=4
-org.eclipse.jdt.core.formatter.use_on_off_tags=false
-org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false
-org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true
-org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true
-org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true
-org.eclipse.jdt.core.javaFormatter=org.eclipse.jdt.core.defaultJavaFormatter
diff --git a/commonmark-ext-gfm-tables/pom.xml b/commonmark-ext-gfm-tables/pom.xml
index ad69f1795..5bd323168 100644
--- a/commonmark-ext-gfm-tables/pom.xml
+++ b/commonmark-ext-gfm-tables/pom.xml
@@ -2,9 +2,9 @@
4.0.0
- com.atlassian.commonmark
+ org.commonmarkcommonmark-parent
- 0.14.1-SNAPSHOT
+ 0.28.1-SNAPSHOTcommonmark-ext-gfm-tables
@@ -13,31 +13,15 @@
- com.atlassian.commonmark
+ org.commonmarkcommonmark
- com.atlassian.commonmark
+ org.commonmarkcommonmark-test-utiltest
-
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
-
- org.commonmark.ext.gfm.tables
-
-
-
-
-
-
-
diff --git a/commonmark-ext-gfm-tables/src/main/java/module-info.java b/commonmark-ext-gfm-tables/src/main/java/module-info.java
new file mode 100644
index 000000000..7e6d2629c
--- /dev/null
+++ b/commonmark-ext-gfm-tables/src/main/java/module-info.java
@@ -0,0 +1,5 @@
+module org.commonmark.ext.gfm.tables {
+ exports org.commonmark.ext.gfm.tables;
+
+ requires transitive org.commonmark;
+}
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java
index 61880c6c3..033c2dd04 100644
--- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java
@@ -9,6 +9,7 @@ public class TableCell extends CustomNode {
private boolean header;
private Alignment alignment;
+ private int width;
/**
* @return whether the cell is a header or not
@@ -22,7 +23,7 @@ public void setHeader(boolean header) {
}
/**
- * @return the cell alignment
+ * @return the cell alignment or {@code null} if no specific alignment
*/
public Alignment getAlignment() {
return alignment;
@@ -32,6 +33,17 @@ public void setAlignment(Alignment alignment) {
this.alignment = alignment;
}
+ /**
+ * @return the cell width (the number of dash and colon characters in the delimiter row of the table for this column)
+ */
+ public int getWidth() {
+ return width;
+ }
+
+ public void setWidth(int width) {
+ this.width = width;
+ }
+
/**
* How the cell is aligned horizontally.
*/
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java
index 5707b0f14..f754b8276 100644
--- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java
@@ -3,16 +3,22 @@
import org.commonmark.Extension;
import org.commonmark.ext.gfm.tables.internal.TableBlockParser;
import org.commonmark.ext.gfm.tables.internal.TableHtmlNodeRenderer;
+import org.commonmark.ext.gfm.tables.internal.TableMarkdownNodeRenderer;
import org.commonmark.ext.gfm.tables.internal.TableTextContentNodeRenderer;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.NodeRenderer;
import org.commonmark.renderer.html.HtmlNodeRendererContext;
import org.commonmark.renderer.html.HtmlNodeRendererFactory;
import org.commonmark.renderer.html.HtmlRenderer;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory;
+import org.commonmark.renderer.markdown.MarkdownRenderer;
import org.commonmark.renderer.text.TextContentNodeRendererContext;
import org.commonmark.renderer.text.TextContentNodeRendererFactory;
import org.commonmark.renderer.text.TextContentRenderer;
+import java.util.Set;
+
/**
* Extension for GFM tables using "|" pipes (GitHub Flavored Markdown).
*
@@ -27,7 +33,7 @@
* @see Tables (extension) in GitHub Flavored Markdown Spec
*/
public class TablesExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension,
- TextContentRenderer.TextContentRendererExtension {
+ TextContentRenderer.TextContentRendererExtension, MarkdownRenderer.MarkdownRendererExtension {
private TablesExtension() {
}
@@ -60,4 +66,19 @@ public NodeRenderer create(TextContentNodeRendererContext context) {
}
});
}
+
+ @Override
+ public void extend(MarkdownRenderer.Builder rendererBuilder) {
+ rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() {
+ @Override
+ public NodeRenderer create(MarkdownNodeRendererContext context) {
+ return new TableMarkdownNodeRenderer(context);
+ }
+
+ @Override
+ public Set getSpecialCharacters() {
+ return Set.of('|');
+ }
+ });
+ }
}
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java
index 112764ba0..57af128d8 100644
--- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java
@@ -3,8 +3,12 @@
import org.commonmark.ext.gfm.tables.*;
import org.commonmark.node.Block;
import org.commonmark.node.Node;
+import org.commonmark.node.SourceSpan;
import org.commonmark.parser.InlineParser;
+import org.commonmark.parser.SourceLine;
+import org.commonmark.parser.SourceLines;
import org.commonmark.parser.block.*;
+import org.commonmark.text.Characters;
import java.util.ArrayList;
import java.util.List;
@@ -12,20 +16,19 @@
public class TableBlockParser extends AbstractBlockParser {
private final TableBlock block = new TableBlock();
- private final List bodyLines = new ArrayList<>();
- private final List columns;
- private final List headerCells;
+ private final List rowLines = new ArrayList<>();
+ private final List columns;
- private boolean nextIsSeparatorLine = true;
+ private boolean canHaveLazyContinuationLines = true;
- private TableBlockParser(List columns, List headerCells) {
+ private TableBlockParser(List columns, SourceLine headerLine) {
this.columns = columns;
- this.headerCells = headerCells;
+ this.rowLines.add(headerLine);
}
@Override
public boolean canHaveLazyContinuationLines() {
- return true;
+ return canHaveLazyContinuationLines;
}
@Override
@@ -35,7 +38,17 @@ public Block getBlock() {
@Override
public BlockContinue tryContinue(ParserState state) {
- if (state.getLine().toString().contains("|")) {
+ CharSequence content = state.getLine().getContent();
+ int pipe = Characters.find('|', content, state.getNextNonSpaceIndex());
+ if (pipe != -1) {
+ if (pipe == state.getNextNonSpaceIndex()) {
+ // If we *only* have a pipe character (and whitespace), that is not a valid table row and ends the table.
+ if (Characters.skipSpaceTab(content, pipe + 1, content.length()) == content.length()) {
+ // We also don't want the pipe to be added via lazy continuation.
+ canHaveLazyContinuationLines = false;
+ return BlockContinue.none();
+ }
+ }
return BlockContinue.atIndex(state.getIndex());
} else {
return BlockContinue.none();
@@ -43,38 +56,48 @@ public BlockContinue tryContinue(ParserState state) {
}
@Override
- public void addLine(CharSequence line) {
- if (nextIsSeparatorLine) {
- nextIsSeparatorLine = false;
- } else {
- bodyLines.add(line);
- }
+ public void addLine(SourceLine line) {
+ rowLines.add(line);
}
@Override
public void parseInlines(InlineParser inlineParser) {
- int headerColumns = headerCells.size();
+ List sourceSpans = block.getSourceSpans();
+ SourceSpan headerSourceSpan = !sourceSpans.isEmpty() ? sourceSpans.get(0) : null;
Node head = new TableHead();
+ if (headerSourceSpan != null) {
+ head.addSourceSpan(headerSourceSpan);
+ }
block.appendChild(head);
TableRow headerRow = new TableRow();
+ headerRow.setSourceSpans(head.getSourceSpans());
head.appendChild(headerRow);
+
+ List headerCells = split(rowLines.get(0));
+ int headerColumns = headerCells.size();
for (int i = 0; i < headerColumns; i++) {
- String cell = headerCells.get(i);
+ SourceLine cell = headerCells.get(i);
TableCell tableCell = parseCell(cell, i, inlineParser);
tableCell.setHeader(true);
headerRow.appendChild(tableCell);
}
- Node body = null;
- for (CharSequence rowLine : bodyLines) {
- List cells = split(rowLine);
+ TableBody body = null;
+ // Body starts at index 2. 0 is header, 1 is separator.
+ for (int rowIndex = 2; rowIndex < rowLines.size(); rowIndex++) {
+ SourceLine rowLine = rowLines.get(rowIndex);
+ SourceSpan sourceSpan = rowIndex < sourceSpans.size() ? sourceSpans.get(rowIndex) : null;
+ List cells = split(rowLine);
TableRow row = new TableRow();
+ if (sourceSpan != null) {
+ row.addSourceSpan(sourceSpan);
+ }
// Body can not have more columns than head
for (int i = 0; i < headerColumns; i++) {
- String cell = i < cells.size() ? cells.get(i) : "";
+ SourceLine cell = i < cells.size() ? cells.get(i) : SourceLine.of("", null);
TableCell tableCell = parseCell(cell, i, inlineParser);
row.appendChild(tableCell);
}
@@ -85,33 +108,50 @@ public void parseInlines(InlineParser inlineParser) {
block.appendChild(body);
}
body.appendChild(row);
+ body.addSourceSpan(sourceSpan);
}
}
- private TableCell parseCell(String cell, int column, InlineParser inlineParser) {
+ private TableCell parseCell(SourceLine cell, int column, InlineParser inlineParser) {
TableCell tableCell = new TableCell();
+ SourceSpan sourceSpan = cell.getSourceSpan();
+ if (sourceSpan != null) {
+ tableCell.addSourceSpan(sourceSpan);
+ }
if (column < columns.size()) {
- tableCell.setAlignment(columns.get(column));
+ TableCellInfo cellInfo = columns.get(column);
+ tableCell.setAlignment(cellInfo.getAlignment());
+ tableCell.setWidth(cellInfo.getWidth());
}
- inlineParser.parse(cell.trim(), tableCell);
+ CharSequence content = cell.getContent();
+ int start = Characters.skipSpaceTab(content, 0, content.length());
+ int end = Characters.skipSpaceTabBackwards(content, content.length() - 1, start);
+ inlineParser.parse(SourceLines.of(cell.substring(start, end + 1)), tableCell);
return tableCell;
}
- private static List split(CharSequence input) {
- String line = input.toString().trim();
- if (line.startsWith("|")) {
- line = line.substring(1);
+ private static List split(SourceLine line) {
+ CharSequence row = line.getContent();
+ int nonSpace = Characters.skipSpaceTab(row, 0, row.length());
+ int cellStart = nonSpace;
+ int cellEnd = row.length();
+ if (row.charAt(nonSpace) == '|') {
+ // This row has leading/trailing pipes - skip the leading pipe
+ cellStart = nonSpace + 1;
+ // Strip whitespace from the end but not the pipe or we could miss an empty ("||") cell
+ int nonSpaceEnd = Characters.skipSpaceTabBackwards(row, row.length() - 1, cellStart);
+ cellEnd = nonSpaceEnd + 1;
}
- List cells = new ArrayList<>();
+ List cells = new ArrayList<>();
StringBuilder sb = new StringBuilder();
- for (int i = 0; i < line.length(); i++) {
- char c = line.charAt(i);
+ for (int i = cellStart; i < cellEnd; i++) {
+ char c = row.charAt(i);
switch (c) {
case '\\':
- if (i + 1 < line.length() && line.charAt(i + 1) == '|') {
+ if (i + 1 < cellEnd && row.charAt(i + 1) == '|') {
// Pipe is special for table parsing. An escaped pipe doesn't result in a new cell, but is
// passed down to inline parsing as an unescaped pipe. Note that that applies even for the `\|`
// in an input like `\\|` - in other words, table parsing doesn't support escaping backslashes.
@@ -123,15 +163,20 @@ private static List split(CharSequence input) {
}
break;
case '|':
- cells.add(sb.toString());
+ String content = sb.toString();
+
+ cells.add(SourceLine.of(content, line.substring(cellStart, i).getSourceSpan()));
sb.setLength(0);
+ // + 1 to skip the pipe itself for the next cell's span
+ cellStart = i + 1;
break;
default:
sb.append(c);
}
}
if (sb.length() > 0) {
- cells.add(sb.toString());
+ String content = sb.toString();
+ cells.add(SourceLine.of(content, line.substring(cellStart, line.getContent().length()).getSourceSpan()));
}
return cells;
}
@@ -144,11 +189,12 @@ private static List split(CharSequence input) {
// -|-
// |-|-|
// --- | ---
- private static List parseSeparator(CharSequence s) {
- List columns = new ArrayList<>();
+ private static List parseSeparator(CharSequence s) {
+ List columns = new ArrayList<>();
int pipes = 0;
boolean valid = false;
int i = 0;
+ int width = 0;
while (i < s.length()) {
char c = s.charAt(i);
switch (c) {
@@ -173,10 +219,12 @@ private static List parseSeparator(CharSequence s) {
if (c == ':') {
left = true;
i++;
+ width++;
}
boolean haveDash = false;
while (i < s.length() && s.charAt(i) == '-') {
i++;
+ width++;
haveDash = true;
}
if (!haveDash) {
@@ -186,8 +234,10 @@ private static List parseSeparator(CharSequence s) {
if (i < s.length() && s.charAt(i) == ':') {
right = true;
i++;
+ width++;
}
- columns.add(getAlignment(left, right));
+ columns.add(new TableCellInfo(getAlignment(left, right), width));
+ width = 0;
// Next, need another pipe
pipes = 0;
break;
@@ -223,17 +273,18 @@ public static class Factory extends AbstractBlockParserFactory {
@Override
public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) {
- CharSequence line = state.getLine();
- CharSequence paragraph = matchedBlockParser.getParagraphContent();
- if (paragraph != null && paragraph.toString().contains("|") && !paragraph.toString().contains("\n")) {
- CharSequence separatorLine = line.subSequence(state.getIndex(), line.length());
- List columns = parseSeparator(separatorLine);
+ List paragraphLines = matchedBlockParser.getParagraphLines().getLines();
+ if (paragraphLines.size() >= 1 && Characters.find('|', paragraphLines.get(paragraphLines.size() - 1).getContent(), 0) != -1) {
+ SourceLine line = state.getLine();
+ SourceLine separatorLine = line.substring(state.getIndex(), line.getContent().length());
+ List columns = parseSeparator(separatorLine.getContent());
if (columns != null && !columns.isEmpty()) {
- List headerCells = split(paragraph);
+ SourceLine paragraph = paragraphLines.get(paragraphLines.size() - 1);
+ List headerCells = split(paragraph);
if (columns.size() >= headerCells.size()) {
- return BlockStart.of(new TableBlockParser(columns, headerCells))
+ return BlockStart.of(new TableBlockParser(columns, paragraph))
.atIndex(state.getIndex())
- .replaceActiveBlockParser();
+ .replaceParagraphLines(1);
}
}
}
@@ -241,4 +292,21 @@ public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockPar
}
}
+ private static class TableCellInfo {
+ private final TableCell.Alignment alignment;
+ private final int width;
+
+ public TableCell.Alignment getAlignment() {
+ return alignment;
+ }
+
+ public int getWidth() {
+ return width;
+ }
+
+ public TableCellInfo(TableCell.Alignment alignment, int width) {
+ this.alignment = alignment;
+ this.width = width;
+ }
+ }
}
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlNodeRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlNodeRenderer.java
index a1de50aac..966c4c151 100644
--- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlNodeRenderer.java
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableHtmlNodeRenderer.java
@@ -5,7 +5,6 @@
import org.commonmark.renderer.html.HtmlNodeRendererContext;
import org.commonmark.renderer.html.HtmlWriter;
-import java.util.Collections;
import java.util.Map;
public class TableHtmlNodeRenderer extends TableNodeRenderer {
@@ -18,6 +17,7 @@ public TableHtmlNodeRenderer(HtmlNodeRendererContext context) {
this.context = context;
}
+ @Override
protected void renderBlock(TableBlock tableBlock) {
htmlWriter.line();
htmlWriter.tag("table", getAttributes(tableBlock, "table"));
@@ -26,6 +26,7 @@ protected void renderBlock(TableBlock tableBlock) {
htmlWriter.line();
}
+ @Override
protected void renderHead(TableHead tableHead) {
htmlWriter.line();
htmlWriter.tag("thead", getAttributes(tableHead, "thead"));
@@ -34,6 +35,7 @@ protected void renderHead(TableHead tableHead) {
htmlWriter.line();
}
+ @Override
protected void renderBody(TableBody tableBody) {
htmlWriter.line();
htmlWriter.tag("tbody", getAttributes(tableBody, "tbody"));
@@ -42,6 +44,7 @@ protected void renderBody(TableBody tableBody) {
htmlWriter.line();
}
+ @Override
protected void renderRow(TableRow tableRow) {
htmlWriter.line();
htmlWriter.tag("tr", getAttributes(tableRow, "tr"));
@@ -50,6 +53,7 @@ protected void renderRow(TableRow tableRow) {
htmlWriter.line();
}
+ @Override
protected void renderCell(TableCell tableCell) {
String tagName = tableCell.isHeader() ? "th" : "td";
htmlWriter.line();
@@ -60,14 +64,14 @@ protected void renderCell(TableCell tableCell) {
}
private Map getAttributes(Node node, String tagName) {
- return context.extendAttributes(node, tagName, Collections.emptyMap());
+ return context.extendAttributes(node, tagName, Map.of());
}
private Map getCellAttributes(TableCell tableCell, String tagName) {
if (tableCell.getAlignment() != null) {
- return context.extendAttributes(tableCell, tagName, Collections.singletonMap("align", getAlignValue(tableCell.getAlignment())));
+ return context.extendAttributes(tableCell, tagName, Map.of("align", getAlignValue(tableCell.getAlignment())));
} else {
- return context.extendAttributes(tableCell, tagName, Collections.emptyMap());
+ return context.extendAttributes(tableCell, tagName, Map.of());
}
}
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableMarkdownNodeRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableMarkdownNodeRenderer.java
new file mode 100644
index 000000000..b0705f579
--- /dev/null
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableMarkdownNodeRenderer.java
@@ -0,0 +1,88 @@
+package org.commonmark.ext.gfm.tables.internal;
+
+import org.commonmark.ext.gfm.tables.*;
+import org.commonmark.node.Node;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownWriter;
+import org.commonmark.text.AsciiMatcher;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * The Table node renderer that is needed for rendering GFM tables (GitHub Flavored Markdown) to text content.
+ */
+public class TableMarkdownNodeRenderer extends TableNodeRenderer {
+ private final MarkdownWriter writer;
+ private final MarkdownNodeRendererContext context;
+
+ private final AsciiMatcher pipe = AsciiMatcher.builder().c('|').build();
+
+ private final List columns = new ArrayList<>();
+
+ public TableMarkdownNodeRenderer(MarkdownNodeRendererContext context) {
+ this.writer = context.getWriter();
+ this.context = context;
+ }
+
+ @Override
+ protected void renderBlock(TableBlock node) {
+ columns.clear();
+ writer.pushTight(true);
+ renderChildren(node);
+ writer.popTight();
+ writer.block();
+ }
+
+ @Override
+ protected void renderHead(TableHead node) {
+ renderChildren(node);
+ for (TableCell.Alignment columnAlignment : columns) {
+ writer.raw('|');
+ if (columnAlignment == TableCell.Alignment.LEFT) {
+ writer.raw(":---");
+ } else if (columnAlignment == TableCell.Alignment.RIGHT) {
+ writer.raw("---:");
+ } else if (columnAlignment == TableCell.Alignment.CENTER) {
+ writer.raw(":---:");
+ } else {
+ writer.raw("---");
+ }
+ }
+ writer.raw("|");
+ writer.block();
+ }
+
+ @Override
+ protected void renderBody(TableBody node) {
+ renderChildren(node);
+ }
+
+ @Override
+ protected void renderRow(TableRow node) {
+ renderChildren(node);
+ // Trailing | at the end of the line
+ writer.raw("|");
+ writer.block();
+ }
+
+ @Override
+ protected void renderCell(TableCell node) {
+ if (node.getParent() != null && node.getParent().getParent() instanceof TableHead) {
+ columns.add(node.getAlignment());
+ }
+ writer.raw("|");
+ writer.pushRawEscape(pipe);
+ renderChildren(node);
+ writer.popRawEscape();
+ }
+
+ private void renderChildren(Node parent) {
+ Node node = parent.getFirstChild();
+ while (node != null) {
+ Node next = node.getNext();
+ context.render(node);
+ node = next;
+ }
+ }
+}
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableNodeRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableNodeRenderer.java
index 93478a30b..2982e1518 100644
--- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableNodeRenderer.java
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableNodeRenderer.java
@@ -1,28 +1,22 @@
package org.commonmark.ext.gfm.tables.internal;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.commonmark.ext.gfm.tables.TableBlock;
-import org.commonmark.ext.gfm.tables.TableBody;
-import org.commonmark.ext.gfm.tables.TableCell;
-import org.commonmark.ext.gfm.tables.TableHead;
-import org.commonmark.ext.gfm.tables.TableRow;
+import org.commonmark.ext.gfm.tables.*;
import org.commonmark.node.Node;
import org.commonmark.renderer.NodeRenderer;
+import java.util.Set;
+
abstract class TableNodeRenderer implements NodeRenderer {
@Override
public Set> getNodeTypes() {
- return new HashSet<>(Arrays.asList(
+ return Set.of(
TableBlock.class,
TableHead.class,
TableBody.class,
TableRow.class,
TableCell.class
- ));
+ );
}
@Override
diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableTextContentNodeRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableTextContentNodeRenderer.java
index 94b0e8665..0ba6894b5 100644
--- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableTextContentNodeRenderer.java
+++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableTextContentNodeRenderer.java
@@ -22,49 +22,46 @@ public TableTextContentNodeRenderer(TextContentNodeRendererContext context) {
this.context = context;
}
+ @Override
protected void renderBlock(TableBlock tableBlock) {
+ // Render rows tight
+ textContentWriter.pushTight(true);
renderChildren(tableBlock);
- if (tableBlock.getNext() != null) {
- textContentWriter.write("\n");
- }
+ textContentWriter.popTight();
+ textContentWriter.block();
}
+ @Override
protected void renderHead(TableHead tableHead) {
renderChildren(tableHead);
}
+ @Override
protected void renderBody(TableBody tableBody) {
renderChildren(tableBody);
}
+ @Override
protected void renderRow(TableRow tableRow) {
- textContentWriter.line();
renderChildren(tableRow);
- textContentWriter.line();
+ textContentWriter.block();
}
+ @Override
protected void renderCell(TableCell tableCell) {
renderChildren(tableCell);
- textContentWriter.write('|');
- textContentWriter.whitespace();
- }
-
- private void renderLastCell(TableCell tableCell) {
- renderChildren(tableCell);
+ // For the last cell in row, don't render the delimiter
+ if (tableCell.getNext() != null) {
+ textContentWriter.write('|');
+ textContentWriter.whitespace();
+ }
}
private void renderChildren(Node parent) {
Node node = parent.getFirstChild();
while (node != null) {
Node next = node.getNext();
-
- // For last cell in row, we dont render the delimiter.
- if (node instanceof TableCell && next == null) {
- renderLastCell((TableCell) node);
- } else {
- context.render(node);
- }
-
+ context.render(node);
node = next;
}
}
diff --git a/commonmark-ext-gfm-tables/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-gfm-tables/src/main/resources/META-INF/LICENSE.txt
new file mode 100644
index 000000000..b09e367ce
--- /dev/null
+++ b/commonmark-ext-gfm-tables/src/main/resources/META-INF/LICENSE.txt
@@ -0,0 +1,23 @@
+Copyright (c) 2015, Atlassian Pty Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TableMarkdownRendererTest.java b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TableMarkdownRendererTest.java
new file mode 100644
index 000000000..85c11206c
--- /dev/null
+++ b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TableMarkdownRendererTest.java
@@ -0,0 +1,75 @@
+package org.commonmark.ext.gfm.tables;
+
+import org.commonmark.Extension;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.markdown.MarkdownRenderer;
+import org.junit.jupiter.api.Test;
+
+import java.util.Set;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+public class TableMarkdownRendererTest {
+
+ private static final Set EXTENSIONS = Set.of(TablesExtension.create());
+ private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
+ private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build();
+
+ @Test
+ public void testHeadNoBody() {
+ assertRoundTrip("|Abc|\n|---|\n");
+ assertRoundTrip("|Abc|Def|\n|---|---|\n");
+ assertRoundTrip("|Abc||\n|---|---|\n");
+ }
+
+ @Test
+ public void testHeadAndBody() {
+ assertRoundTrip("|Abc|\n|---|\n|1|\n");
+ assertRoundTrip("|Abc|Def|\n|---|---|\n|1|2|\n");
+ }
+
+ @Test
+ public void testBodyHasFewerColumns() {
+ // Could try not to write empty trailing cells but this is fine too
+ assertRoundTrip("|Abc|Def|\n|---|---|\n|1||\n");
+ }
+
+ @Test
+ public void testAlignment() {
+ assertRoundTrip("|Abc|Def|\n|:---|---|\n|1|2|\n");
+ assertRoundTrip("|Abc|Def|\n|---|---:|\n|1|2|\n");
+ assertRoundTrip("|Abc|Def|\n|:---:|:---:|\n|1|2|\n");
+ }
+
+ @Test
+ public void testInsideBlockQuote() {
+ assertRoundTrip("> |Abc|Def|\n> |---|---|\n> |1|2|\n");
+ }
+
+ @Test
+ public void testMultipleTables() {
+ assertRoundTrip("|Abc|Def|\n|---|---|\n\n|One|\n|---|\n|Only|\n");
+ }
+
+ @Test
+ public void testEscaping() {
+ assertRoundTrip("|Abc|Def|\n|---|---|\n|Pipe in|text \\||\n");
+ assertRoundTrip("|Abc|Def|\n|---|---|\n|Pipe in|code `\\|`|\n");
+ assertRoundTrip("|Abc|Def|\n|---|---|\n|Inline HTML|Foo\\|bar|\n");
+ }
+
+ @Test
+ public void testEscaped() {
+ // `|` in Text nodes needs to be escaped, otherwise the generated Markdown does not get parsed back as a table
+ assertRoundTrip("\\|Abc\\|\n\\|---\\|\n");
+ }
+
+ protected String render(String source) {
+ return RENDERER.render(PARSER.parse(source));
+ }
+
+ private void assertRoundTrip(String input) {
+ String rendered = render(input);
+ assertThat(rendered).isEqualTo(input);
+ }
+}
diff --git a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesSpecTest.java b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesSpecTest.java
index 12c806e32..e7f3db4d1 100644
--- a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesSpecTest.java
+++ b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesSpecTest.java
@@ -7,39 +7,27 @@
import org.commonmark.testutil.TestResources;
import org.commonmark.testutil.example.Example;
import org.commonmark.testutil.example.ExampleReader;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.Parameter;
+import org.junit.jupiter.params.ParameterizedClass;
+import org.junit.jupiter.params.provider.MethodSource;
-import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
import java.util.Set;
-@RunWith(Parameterized.class)
+@ParameterizedClass
+@MethodSource("data")
public class TablesSpecTest extends RenderingTestCase {
- private static final Set EXTENSIONS = Collections.singleton(TablesExtension.create());
+ private static final Set EXTENSIONS = Set.of(TablesExtension.create());
private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build();
- private final Example example;
+ @Parameter
+ Example example;
- public TablesSpecTest(Example example) {
- this.example = example;
- }
-
- @Parameters(name = "{0}")
- public static List
\n");
+ }
+
+ @Test
+ public void sourceSpans() {
+ Parser parser = Parser.builder()
+ .extensions(EXTENSIONS)
+ .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES)
+ .build();
+
+ // This doesn't result in image attributes, so source spans should be for the single (merged) text node.
+ Node document = parser.parse("x{height=3 width=4}\n");
+ Paragraph block = (Paragraph) document.getFirstChild();
+ Node text = block.getFirstChild();
+ assertThat(text.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 0, 0, 19)));
+ }
+
+ @Override
+ protected String render(String source) {
+ return RENDERER.render(PARSER.parse(source));
+ }
+}
diff --git a/commonmark-ext-ins/pom.xml b/commonmark-ext-ins/pom.xml
index 6f8c59318..48481c073 100644
--- a/commonmark-ext-ins/pom.xml
+++ b/commonmark-ext-ins/pom.xml
@@ -2,9 +2,9 @@
4.0.0
- com.atlassian.commonmark
+ org.commonmarkcommonmark-parent
- 0.14.1-SNAPSHOT
+ 0.28.1-SNAPSHOTcommonmark-ext-ins
@@ -13,31 +13,15 @@
- com.atlassian.commonmark
+ org.commonmarkcommonmark
- com.atlassian.commonmark
+ org.commonmarkcommonmark-test-utiltest
-
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
-
- org.commonmark.ext.ins
-
-
-
-
-
-
-
diff --git a/commonmark-ext-ins/src/main/java/module-info.java b/commonmark-ext-ins/src/main/java/module-info.java
new file mode 100644
index 000000000..fb96ea598
--- /dev/null
+++ b/commonmark-ext-ins/src/main/java/module-info.java
@@ -0,0 +1,5 @@
+module org.commonmark.ext.ins {
+ exports org.commonmark.ext.ins;
+
+ requires transitive org.commonmark;
+}
diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/InsExtension.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/InsExtension.java
index 831cd75c8..e8a53e59a 100644
--- a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/InsExtension.java
+++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/InsExtension.java
@@ -2,12 +2,22 @@
import org.commonmark.Extension;
import org.commonmark.ext.ins.internal.InsDelimiterProcessor;
-import org.commonmark.ext.ins.internal.InsNodeRenderer;
+import org.commonmark.ext.ins.internal.InsHtmlNodeRenderer;
+import org.commonmark.ext.ins.internal.InsMarkdownNodeRenderer;
+import org.commonmark.ext.ins.internal.InsTextContentNodeRenderer;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.NodeRenderer;
import org.commonmark.renderer.html.HtmlNodeRendererContext;
import org.commonmark.renderer.html.HtmlNodeRendererFactory;
-import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer;
-import org.commonmark.renderer.NodeRenderer;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory;
+import org.commonmark.renderer.markdown.MarkdownRenderer;
+import org.commonmark.renderer.text.TextContentNodeRendererContext;
+import org.commonmark.renderer.text.TextContentNodeRendererFactory;
+import org.commonmark.renderer.text.TextContentRenderer;
+
+import java.util.Set;
/**
* Extension for ins using ++
@@ -20,7 +30,7 @@
* The parsed ins text regions are turned into {@link Ins} nodes.
*
*/
-public class InsExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension {
+public class InsExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension, TextContentRenderer.TextContentRendererExtension, MarkdownRenderer.MarkdownRendererExtension {
private InsExtension() {
}
@@ -39,7 +49,34 @@ public void extend(HtmlRenderer.Builder rendererBuilder) {
rendererBuilder.nodeRendererFactory(new HtmlNodeRendererFactory() {
@Override
public NodeRenderer create(HtmlNodeRendererContext context) {
- return new InsNodeRenderer(context);
+ return new InsHtmlNodeRenderer(context);
+ }
+ });
+ }
+
+ @Override
+ public void extend(TextContentRenderer.Builder rendererBuilder) {
+ rendererBuilder.nodeRendererFactory(new TextContentNodeRendererFactory() {
+ @Override
+ public NodeRenderer create(TextContentNodeRendererContext context) {
+ return new InsTextContentNodeRenderer(context);
+ }
+ });
+ }
+
+ @Override
+ public void extend(MarkdownRenderer.Builder rendererBuilder) {
+ rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() {
+ @Override
+ public NodeRenderer create(MarkdownNodeRendererContext context) {
+ return new InsMarkdownNodeRenderer(context);
+ }
+
+ @Override
+ public Set getSpecialCharacters() {
+ // We technically don't need to escape single occurrences of +, but that's all the extension API
+ // exposes currently.
+ return Set.of('+');
}
});
}
diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsDelimiterProcessor.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsDelimiterProcessor.java
index 9a4ad383c..b0bfb4c6e 100644
--- a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsDelimiterProcessor.java
+++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsDelimiterProcessor.java
@@ -2,6 +2,8 @@
import org.commonmark.ext.ins.Ins;
import org.commonmark.node.Node;
+import org.commonmark.node.Nodes;
+import org.commonmark.node.SourceSpans;
import org.commonmark.node.Text;
import org.commonmark.parser.delimiter.DelimiterProcessor;
import org.commonmark.parser.delimiter.DelimiterRun;
@@ -24,27 +26,31 @@ public int getMinLength() {
}
@Override
- public int getDelimiterUse(DelimiterRun opener, DelimiterRun closer) {
- if (opener.length() >= 2 && closer.length() >= 2) {
+ public int process(DelimiterRun openingRun, DelimiterRun closingRun) {
+ if (openingRun.length() >= 2 && closingRun.length() >= 2) {
// Use exactly two delimiters even if we have more, and don't care about internal openers/closers.
+
+ Text opener = openingRun.getOpener();
+
+ // Wrap nodes between delimiters in ins.
+ Node ins = new Ins();
+
+ SourceSpans sourceSpans = new SourceSpans();
+ sourceSpans.addAllFrom(openingRun.getOpeners(2));
+
+ for (Node node : Nodes.between(opener, closingRun.getCloser())) {
+ ins.appendChild(node);
+ sourceSpans.addAll(node.getSourceSpans());
+ }
+
+ sourceSpans.addAllFrom(closingRun.getClosers(2));
+ ins.setSourceSpans(sourceSpans.getSourceSpans());
+
+ opener.insertAfter(ins);
+
return 2;
} else {
return 0;
}
}
-
- @Override
- public void process(Text opener, Text closer, int delimiterCount) {
- // Wrap nodes between delimiters in ins.
- Node ins = new Ins();
-
- Node tmp = opener.getNext();
- while (tmp != null && tmp != closer) {
- Node next = tmp.getNext();
- ins.appendChild(tmp);
- tmp = next;
- }
-
- opener.insertAfter(ins);
- }
}
diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsHtmlNodeRenderer.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsHtmlNodeRenderer.java
new file mode 100644
index 000000000..dcd05fd59
--- /dev/null
+++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsHtmlNodeRenderer.java
@@ -0,0 +1,35 @@
+package org.commonmark.ext.ins.internal;
+
+import org.commonmark.node.Node;
+import org.commonmark.renderer.html.HtmlNodeRendererContext;
+import org.commonmark.renderer.html.HtmlWriter;
+
+import java.util.Map;
+
+public class InsHtmlNodeRenderer extends InsNodeRenderer {
+
+ private final HtmlNodeRendererContext context;
+ private final HtmlWriter html;
+
+ public InsHtmlNodeRenderer(HtmlNodeRendererContext context) {
+ this.context = context;
+ this.html = context.getWriter();
+ }
+
+ @Override
+ public void render(Node node) {
+ Map attributes = context.extendAttributes(node, "ins", Map.of());
+ html.tag("ins", attributes);
+ renderChildren(node);
+ html.tag("/ins");
+ }
+
+ private void renderChildren(Node parent) {
+ Node node = parent.getFirstChild();
+ while (node != null) {
+ Node next = node.getNext();
+ context.render(node);
+ node = next;
+ }
+ }
+}
diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsMarkdownNodeRenderer.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsMarkdownNodeRenderer.java
new file mode 100644
index 000000000..851d47282
--- /dev/null
+++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsMarkdownNodeRenderer.java
@@ -0,0 +1,32 @@
+package org.commonmark.ext.ins.internal;
+
+import org.commonmark.node.Node;
+import org.commonmark.renderer.markdown.MarkdownNodeRendererContext;
+import org.commonmark.renderer.markdown.MarkdownWriter;
+
+public class InsMarkdownNodeRenderer extends InsNodeRenderer {
+
+ private final MarkdownNodeRendererContext context;
+ private final MarkdownWriter writer;
+
+ public InsMarkdownNodeRenderer(MarkdownNodeRendererContext context) {
+ this.context = context;
+ this.writer = context.getWriter();
+ }
+
+ @Override
+ public void render(Node node) {
+ writer.raw("++");
+ renderChildren(node);
+ writer.raw("++");
+ }
+
+ private void renderChildren(Node parent) {
+ Node node = parent.getFirstChild();
+ while (node != null) {
+ Node next = node.getNext();
+ context.render(node);
+ node = next;
+ }
+ }
+}
diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsNodeRenderer.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsNodeRenderer.java
index faf15cae7..31f0a64ec 100644
--- a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsNodeRenderer.java
+++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsNodeRenderer.java
@@ -1,44 +1,15 @@
package org.commonmark.ext.ins.internal;
import org.commonmark.ext.ins.Ins;
-import org.commonmark.renderer.html.HtmlWriter;
-import org.commonmark.renderer.html.HtmlNodeRendererContext;
import org.commonmark.node.Node;
import org.commonmark.renderer.NodeRenderer;
-import java.util.Collections;
-import java.util.Map;
import java.util.Set;
-public class InsNodeRenderer implements NodeRenderer {
-
- private final HtmlNodeRendererContext context;
- private final HtmlWriter html;
-
- public InsNodeRenderer(HtmlNodeRendererContext context) {
- this.context = context;
- this.html = context.getWriter();
- }
+abstract class InsNodeRenderer implements NodeRenderer {
@Override
public Set> getNodeTypes() {
- return Collections.>singleton(Ins.class);
- }
-
- @Override
- public void render(Node node) {
- Map attributes = context.extendAttributes(node, "ins", Collections.emptyMap());
- html.tag("ins", attributes);
- renderChildren(node);
- html.tag("/ins");
- }
-
- private void renderChildren(Node parent) {
- Node node = parent.getFirstChild();
- while (node != null) {
- Node next = node.getNext();
- context.render(node);
- node = next;
- }
+ return Set.of(Ins.class);
}
}
diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsTextContentNodeRenderer.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsTextContentNodeRenderer.java
new file mode 100644
index 000000000..f30947c93
--- /dev/null
+++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsTextContentNodeRenderer.java
@@ -0,0 +1,27 @@
+package org.commonmark.ext.ins.internal;
+
+import org.commonmark.node.Node;
+import org.commonmark.renderer.text.TextContentNodeRendererContext;
+
+public class InsTextContentNodeRenderer extends InsNodeRenderer {
+
+ private final TextContentNodeRendererContext context;
+
+ public InsTextContentNodeRenderer(TextContentNodeRendererContext context) {
+ this.context = context;
+ }
+
+ @Override
+ public void render(Node node) {
+ renderChildren(node);
+ }
+
+ private void renderChildren(Node parent) {
+ Node node = parent.getFirstChild();
+ while (node != null) {
+ Node next = node.getNext();
+ context.render(node);
+ node = next;
+ }
+ }
+}
diff --git a/commonmark-ext-ins/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-ins/src/main/resources/META-INF/LICENSE.txt
new file mode 100644
index 000000000..b09e367ce
--- /dev/null
+++ b/commonmark-ext-ins/src/main/resources/META-INF/LICENSE.txt
@@ -0,0 +1,23 @@
+Copyright (c) 2015, Atlassian Pty Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsMarkdownRendererTest.java b/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsMarkdownRendererTest.java
new file mode 100644
index 000000000..6fc9ead67
--- /dev/null
+++ b/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsMarkdownRendererTest.java
@@ -0,0 +1,33 @@
+package org.commonmark.ext.ins;
+
+import org.commonmark.Extension;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.markdown.MarkdownRenderer;
+import org.junit.jupiter.api.Test;
+
+import java.util.Set;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+public class InsMarkdownRendererTest {
+
+ private static final Set EXTENSIONS = Set.of(InsExtension.create());
+ private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
+ private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build();
+
+ @Test
+ public void testStrikethrough() {
+ assertRoundTrip("++foo++\n");
+
+ assertRoundTrip("\\+\\+foo\\+\\+\n");
+ }
+
+ protected String render(String source) {
+ return RENDERER.render(PARSER.parse(source));
+ }
+
+ private void assertRoundTrip(String input) {
+ String rendered = render(input);
+ assertThat(rendered).isEqualTo(input);
+ }
+}
diff --git a/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsTest.java b/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsTest.java
index 2b97431c3..a5c91a395 100644
--- a/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsTest.java
+++ b/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsTest.java
@@ -2,21 +2,27 @@
import org.commonmark.Extension;
import org.commonmark.node.Node;
+import org.commonmark.node.Paragraph;
+import org.commonmark.node.SourceSpan;
+import org.commonmark.parser.IncludeSourceSpans;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer;
+import org.commonmark.renderer.text.TextContentRenderer;
import org.commonmark.testutil.RenderingTestCase;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.util.Collections;
+import java.util.List;
import java.util.Set;
-import static org.junit.Assert.assertEquals;
+import static org.assertj.core.api.Assertions.assertThat;
public class InsTest extends RenderingTestCase {
- private static final Set EXTENSIONS = Collections.singleton(InsExtension.create());
+ private static final Set EXTENSIONS = Set.of(InsExtension.create());
private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build();
+ private static final TextContentRenderer CONTENT_RENDERER = TextContentRenderer.builder()
+ .extensions(EXTENSIONS).build();
@Test
public void onePlusIsNotEnough() {
@@ -76,8 +82,27 @@ public void insideBlockQuote() {
public void delimited() {
Node document = PARSER.parse("++foo++");
Ins ins = (Ins) document.getFirstChild().getFirstChild();
- assertEquals("++", ins.getOpeningDelimiter());
- assertEquals("++", ins.getClosingDelimiter());
+ assertThat(ins.getOpeningDelimiter()).isEqualTo("++");
+ assertThat(ins.getClosingDelimiter()).isEqualTo("++");
+ }
+
+ @Test
+ public void textContentRenderer() {
+ Node document = PARSER.parse("++foo++");
+ assertThat(CONTENT_RENDERER.render(document)).isEqualTo("foo");
+ }
+
+ @Test
+ public void sourceSpans() {
+ Parser parser = Parser.builder()
+ .extensions(EXTENSIONS)
+ .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES)
+ .build();
+
+ Node document = parser.parse("hey ++there++\n");
+ Paragraph block = (Paragraph) document.getFirstChild();
+ Node ins = block.getLastChild();
+ assertThat(ins.getSourceSpans()).isEqualTo(List.of(SourceSpan.of(0, 4, 4, 9)));
}
@Override
diff --git a/commonmark-ext-task-list-items/pom.xml b/commonmark-ext-task-list-items/pom.xml
new file mode 100644
index 000000000..4359f8707
--- /dev/null
+++ b/commonmark-ext-task-list-items/pom.xml
@@ -0,0 +1,27 @@
+
+
+ 4.0.0
+
+ org.commonmark
+ commonmark-parent
+ 0.28.1-SNAPSHOT
+
+
+ commonmark-ext-task-list-items
+ commonmark-java extension for task list items
+ commonmark-java extension for task list items
+
+
+
+ org.commonmark
+ commonmark
+
+
+
+ org.commonmark
+ commonmark-test-util
+ test
+
+
+
+
diff --git a/commonmark-ext-task-list-items/src/main/java/module-info.java b/commonmark-ext-task-list-items/src/main/java/module-info.java
new file mode 100644
index 000000000..9528323ea
--- /dev/null
+++ b/commonmark-ext-task-list-items/src/main/java/module-info.java
@@ -0,0 +1,5 @@
+module org.commonmark.ext.task.list.items {
+ exports org.commonmark.ext.task.list.items;
+
+ requires transitive org.commonmark;
+}
diff --git a/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/TaskListItemMarker.java b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/TaskListItemMarker.java
new file mode 100644
index 000000000..9eca59bc9
--- /dev/null
+++ b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/TaskListItemMarker.java
@@ -0,0 +1,19 @@
+package org.commonmark.ext.task.list.items;
+
+import org.commonmark.node.CustomNode;
+
+/**
+ * A marker node indicating that a list item contains a task.
+ */
+public class TaskListItemMarker extends CustomNode {
+
+ private final boolean checked;
+
+ public TaskListItemMarker(boolean checked) {
+ this.checked = checked;
+ }
+
+ public boolean isChecked() {
+ return checked;
+ }
+}
diff --git a/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/TaskListItemsExtension.java b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/TaskListItemsExtension.java
new file mode 100644
index 000000000..9bf0a2155
--- /dev/null
+++ b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/TaskListItemsExtension.java
@@ -0,0 +1,45 @@
+package org.commonmark.ext.task.list.items;
+
+import org.commonmark.Extension;
+import org.commonmark.ext.task.list.items.internal.TaskListItemHtmlNodeRenderer;
+import org.commonmark.ext.task.list.items.internal.TaskListItemPostProcessor;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.NodeRenderer;
+import org.commonmark.renderer.html.HtmlNodeRendererContext;
+import org.commonmark.renderer.html.HtmlNodeRendererFactory;
+import org.commonmark.renderer.html.HtmlRenderer;
+
+/**
+ * Extension for adding task list items.
+ *
+ * Create it with {@link #create()} and then configure it on the builders
+ * ({@link org.commonmark.parser.Parser.Builder#extensions(Iterable)},
+ * {@link HtmlRenderer.Builder#extensions(Iterable)}).
+ *
+ *
+ * @since 0.15.0
+ */
+public class TaskListItemsExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension {
+
+ private TaskListItemsExtension() {
+ }
+
+ public static Extension create() {
+ return new TaskListItemsExtension();
+ }
+
+ @Override
+ public void extend(Parser.Builder parserBuilder) {
+ parserBuilder.postProcessor(new TaskListItemPostProcessor());
+ }
+
+ @Override
+ public void extend(HtmlRenderer.Builder rendererBuilder) {
+ rendererBuilder.nodeRendererFactory(new HtmlNodeRendererFactory() {
+ @Override
+ public NodeRenderer create(HtmlNodeRendererContext context) {
+ return new TaskListItemHtmlNodeRenderer(context);
+ }
+ });
+ }
+}
diff --git a/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/internal/TaskListItemHtmlNodeRenderer.java b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/internal/TaskListItemHtmlNodeRenderer.java
new file mode 100644
index 000000000..331b301e9
--- /dev/null
+++ b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/internal/TaskListItemHtmlNodeRenderer.java
@@ -0,0 +1,52 @@
+package org.commonmark.ext.task.list.items.internal;
+
+import org.commonmark.ext.task.list.items.TaskListItemMarker;
+import org.commonmark.node.Node;
+import org.commonmark.renderer.NodeRenderer;
+import org.commonmark.renderer.html.HtmlNodeRendererContext;
+import org.commonmark.renderer.html.HtmlWriter;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+
+public class TaskListItemHtmlNodeRenderer implements NodeRenderer {
+
+ private final HtmlNodeRendererContext context;
+ private final HtmlWriter html;
+
+ public TaskListItemHtmlNodeRenderer(HtmlNodeRendererContext context) {
+ this.context = context;
+ this.html = context.getWriter();
+ }
+
+ @Override
+ public Set> getNodeTypes() {
+ return Set.of(TaskListItemMarker.class);
+ }
+
+ @Override
+ public void render(Node node) {
+ if (node instanceof TaskListItemMarker) {
+ Map attributes = new LinkedHashMap<>();
+ attributes.put("type", "checkbox");
+ attributes.put("disabled", "");
+ if (((TaskListItemMarker) node).isChecked()) {
+ attributes.put("checked", "");
+ }
+ html.tag("input", context.extendAttributes(node, "input", attributes));
+ // Add a space after the input tag (as the next text node has been trimmed)
+ html.text(" ");
+ renderChildren(node);
+ }
+ }
+
+ private void renderChildren(Node parent) {
+ Node node = parent.getFirstChild();
+ while (node != null) {
+ Node next = node.getNext();
+ context.render(node);
+ node = next;
+ }
+ }
+}
diff --git a/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/internal/TaskListItemPostProcessor.java b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/internal/TaskListItemPostProcessor.java
new file mode 100644
index 000000000..b95c2e30d
--- /dev/null
+++ b/commonmark-ext-task-list-items/src/main/java/org/commonmark/ext/task/list/items/internal/TaskListItemPostProcessor.java
@@ -0,0 +1,49 @@
+package org.commonmark.ext.task.list.items.internal;
+
+import org.commonmark.ext.task.list.items.TaskListItemMarker;
+import org.commonmark.node.*;
+import org.commonmark.parser.PostProcessor;
+
+import java.util.Objects;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class TaskListItemPostProcessor implements PostProcessor {
+
+ private static final Pattern REGEX_TASK_LIST_ITEM = Pattern.compile("^\\[([xX\\s])]\\s+(.*)");
+
+ @Override
+ public Node process(Node node) {
+ TaskListItemVisitor visitor = new TaskListItemVisitor();
+ node.accept(visitor);
+ return node;
+ }
+
+ private static class TaskListItemVisitor extends AbstractVisitor {
+
+ @Override
+ public void visit(ListItem listItem) {
+ Node child = listItem.getFirstChild();
+ if (child instanceof Paragraph) {
+ Node node = child.getFirstChild();
+ if (node instanceof Text) {
+ Text textNode = (Text) node;
+ Matcher matcher = REGEX_TASK_LIST_ITEM.matcher(textNode.getLiteral());
+ if (matcher.matches()) {
+ String checked = matcher.group(1);
+ boolean isChecked = Objects.equals(checked, "X") || Objects.equals(checked, "x");
+
+ // Add the task list item marker node as the first child of the list item.
+ listItem.prependChild(new TaskListItemMarker(isChecked));
+
+ // Parse the node using the input after the task marker (in other words, group 2 from the matcher).
+ // (Note that the String has been trimmed, so we should add a space between the
+ // TaskListItemMarker and the text that follows it when we come to render it).
+ textNode.setLiteral(matcher.group(2));
+ }
+ }
+ }
+ visitChildren(listItem);
+ }
+ }
+}
diff --git a/commonmark-ext-task-list-items/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-task-list-items/src/main/resources/META-INF/LICENSE.txt
new file mode 100644
index 000000000..b09e367ce
--- /dev/null
+++ b/commonmark-ext-task-list-items/src/main/resources/META-INF/LICENSE.txt
@@ -0,0 +1,23 @@
+Copyright (c) 2015, Atlassian Pty Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/commonmark-ext-task-list-items/src/test/java/org/commonmark/ext/task/list/items/TaskListItemsTest.java b/commonmark-ext-task-list-items/src/test/java/org/commonmark/ext/task/list/items/TaskListItemsTest.java
new file mode 100644
index 000000000..0adc615a7
--- /dev/null
+++ b/commonmark-ext-task-list-items/src/test/java/org/commonmark/ext/task/list/items/TaskListItemsTest.java
@@ -0,0 +1,102 @@
+package org.commonmark.ext.task.list.items;
+
+import org.commonmark.Extension;
+import org.commonmark.parser.Parser;
+import org.commonmark.renderer.html.HtmlRenderer;
+import org.commonmark.testutil.RenderingTestCase;
+import org.junit.jupiter.api.Test;
+
+import java.util.Set;
+
+public class TaskListItemsTest extends RenderingTestCase {
+
+ private static final Set EXTENSIONS = Set.of(TaskListItemsExtension.create());
+ private static final String HTML_CHECKED = "";
+ private static final String HTML_UNCHECKED = "";
+ private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
+ private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build();
+
+ @Test
+ public void baseCase() {
+ assertRendering("- [x] this is *done*\n", "
\n");
+ assertRendering("- [O] is not a _task_\n", "
\n
[O] is not a task
\n
\n");
+ assertRendering("* [] neither is this\n", "
\n
[] neither is this
\n
\n");
+ assertRendering("* [ ] nor this\n" +
+ "* [XX] nor this\n",
+ "
\n
[ ] nor this
\n
[XX] nor this
\n
\n");
+ assertRendering("+ [x]] is not a task\n", "
\n
[x]] is not a task
\n
\n");
+ assertRendering("- [x isn't\n", "
\n
[x isn't
\n
\n");
+ assertRendering("- [[x is not\n", "
\n
[[x is not
\n
\n");
+ assertRendering("- x] nope\n", "
\n
x] nope
\n
\n");
+ assertRendering("- x]] no way\n", "
\n
x]] no way
\n
\n");
+ assertRendering("+ (x) sorry no\n", "
\n
(x) sorry no
\n
\n");
+ assertRendering("+ {x} sorry not sorry\n", "
\n
{x} sorry not sorry
\n
\n");
+ assertRendering("+ [[x]] nooo\n", "
\n
[[x]] nooo
\n
\n");
+ assertRendering("+ text before [x] is not a task\n", "
\n
text before [x] is not a task
\n
\n");
+ assertRendering("* [x] \n* [ ] \n", "
\n
[x]
\n
[ ]
\n
\n");
+ }
+
+ @Override
+ protected String render(String source) {
+ return RENDERER.render(PARSER.parse(source));
+ }
+}
diff --git a/commonmark-ext-yaml-front-matter/pom.xml b/commonmark-ext-yaml-front-matter/pom.xml
index d4534ec55..e6822f771 100644
--- a/commonmark-ext-yaml-front-matter/pom.xml
+++ b/commonmark-ext-yaml-front-matter/pom.xml
@@ -3,8 +3,8 @@
4.0.0commonmark-parent
- com.atlassian.commonmark
- 0.14.1-SNAPSHOT
+ org.commonmark
+ 0.28.1-SNAPSHOTcommonmark-ext-yaml-front-matter
@@ -13,31 +13,15 @@
- com.atlassian.commonmark
+ org.commonmarkcommonmark
- com.atlassian.commonmark
+ org.commonmarkcommonmark-test-utiltest
-
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
-
- org.commonmark.ext.front.matter
-
-
-
-
-
-
-
diff --git a/commonmark-ext-yaml-front-matter/src/main/java/module-info.java b/commonmark-ext-yaml-front-matter/src/main/java/module-info.java
new file mode 100644
index 000000000..5f96c14ad
--- /dev/null
+++ b/commonmark-ext-yaml-front-matter/src/main/java/module-info.java
@@ -0,0 +1,5 @@
+module org.commonmark.ext.front.matter {
+ exports org.commonmark.ext.front.matter;
+
+ requires transitive org.commonmark;
+}
diff --git a/commonmark-ext-yaml-front-matter/src/main/java/org/commonmark/ext/front/matter/internal/YamlFrontMatterBlockParser.java b/commonmark-ext-yaml-front-matter/src/main/java/org/commonmark/ext/front/matter/internal/YamlFrontMatterBlockParser.java
index 5612d9ffd..469cf4e2f 100644
--- a/commonmark-ext-yaml-front-matter/src/main/java/org/commonmark/ext/front/matter/internal/YamlFrontMatterBlockParser.java
+++ b/commonmark-ext-yaml-front-matter/src/main/java/org/commonmark/ext/front/matter/internal/YamlFrontMatterBlockParser.java
@@ -2,9 +2,10 @@
import org.commonmark.ext.front.matter.YamlFrontMatterBlock;
import org.commonmark.ext.front.matter.YamlFrontMatterNode;
-import org.commonmark.internal.DocumentBlockParser;
import org.commonmark.node.Block;
+import org.commonmark.node.Document;
import org.commonmark.parser.InlineParser;
+import org.commonmark.parser.SourceLine;
import org.commonmark.parser.block.*;
import java.util.ArrayList;
@@ -13,7 +14,7 @@
import java.util.regex.Pattern;
public class YamlFrontMatterBlockParser extends AbstractBlockParser {
- private static final Pattern REGEX_METADATA = Pattern.compile("^[ ]{0,3}([A-Za-z0-9_-]+):\\s*(.*)");
+ private static final Pattern REGEX_METADATA = Pattern.compile("^[ ]{0,3}([A-Za-z0-9._-]+):\\s*(.*)");
private static final Pattern REGEX_METADATA_LIST = Pattern.compile("^[ ]+-\\s*(.*)");
private static final Pattern REGEX_METADATA_LITERAL = Pattern.compile("^\\s*(.*)");
private static final Pattern REGEX_BEGIN = Pattern.compile("^-{3}(\\s.*)?");
@@ -37,12 +38,12 @@ public Block getBlock() {
}
@Override
- public void addLine(CharSequence line) {
+ public void addLine(SourceLine line) {
}
@Override
public BlockContinue tryContinue(ParserState parserState) {
- final CharSequence line = parserState.getLine();
+ final CharSequence line = parserState.getLine().getContent();
if (REGEX_END.matcher(line).matches()) {
if (currentKey != null) {
@@ -60,10 +61,11 @@ public BlockContinue tryContinue(ParserState parserState) {
inLiteral = false;
currentKey = matcher.group(1);
currentValues = new ArrayList<>();
- if ("|".equals(matcher.group(2))) {
+ String value = matcher.group(2);
+ if ("|".equals(value)) {
inLiteral = true;
- } else if (!"".equals(matcher.group(2))) {
- currentValues.add(matcher.group(2));
+ } else if (!"".equals(value)) {
+ currentValues.add(parseString(value));
}
return BlockContinue.atIndex(parserState.getIndex());
@@ -80,7 +82,8 @@ public BlockContinue tryContinue(ParserState parserState) {
} else {
matcher = REGEX_METADATA_LIST.matcher(line);
if (matcher.matches()) {
- currentValues.add(matcher.group(1));
+ String value = matcher.group(1);
+ currentValues.add(parseString(value));
}
}
@@ -92,13 +95,31 @@ public BlockContinue tryContinue(ParserState parserState) {
public void parseInlines(InlineParser inlineParser) {
}
+ private static String parseString(String s) {
+ // Limited parsing of https://yaml.org/spec/1.2.2/#73-flow-scalar-styles
+ // We assume input is well-formed and otherwise treat it as a plain string. In a real
+ // parser, e.g. `'foo` would be invalid because it's missing a trailing `'`.
+ if (s.startsWith("'") && s.endsWith("'")) {
+ String inner = s.substring(1, s.length() - 1);
+ return inner.replace("''", "'");
+ } else if (s.startsWith("\"") && s.endsWith("\"")) {
+ String inner = s.substring(1, s.length() - 1);
+ // Only support escaped `\` and `"`, nothing else.
+ return inner
+ .replace("\\\"", "\"")
+ .replace("\\\\", "\\");
+ } else {
+ return s;
+ }
+ }
+
public static class Factory extends AbstractBlockParserFactory {
@Override
public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) {
- CharSequence line = state.getLine();
+ CharSequence line = state.getLine().getContent();
BlockParser parentParser = matchedBlockParser.getMatchedBlockParser();
// check whether this line is the first line of whole document or not
- if (parentParser instanceof DocumentBlockParser && parentParser.getBlock().getFirstChild() == null &&
+ if (parentParser.getBlock() instanceof Document && parentParser.getBlock().getFirstChild() == null &&
REGEX_BEGIN.matcher(line).matches()) {
return BlockStart.of(new YamlFrontMatterBlockParser()).atIndex(state.getNextNonSpaceIndex());
}
diff --git a/commonmark-ext-yaml-front-matter/src/main/resources/META-INF/LICENSE.txt b/commonmark-ext-yaml-front-matter/src/main/resources/META-INF/LICENSE.txt
new file mode 100644
index 000000000..b09e367ce
--- /dev/null
+++ b/commonmark-ext-yaml-front-matter/src/main/resources/META-INF/LICENSE.txt
@@ -0,0 +1,23 @@
+Copyright (c) 2015, Atlassian Pty Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/commonmark-ext-yaml-front-matter/src/test/java/org/commonmark/ext/front/matter/YamlFrontMatterTest.java b/commonmark-ext-yaml-front-matter/src/test/java/org/commonmark/ext/front/matter/YamlFrontMatterTest.java
index 505c70e6a..db17d4a4e 100644
--- a/commonmark-ext-yaml-front-matter/src/test/java/org/commonmark/ext/front/matter/YamlFrontMatterTest.java
+++ b/commonmark-ext-yaml-front-matter/src/test/java/org/commonmark/ext/front/matter/YamlFrontMatterTest.java
@@ -6,18 +6,16 @@
import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer;
import org.commonmark.testutil.RenderingTestCase;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.assertj.core.api.Assertions.assertThat;
public class YamlFrontMatterTest extends RenderingTestCase {
- private static final Set EXTENSIONS = Collections.singleton(YamlFrontMatterExtension.create());
+ private static final Set EXTENSIONS = Set.of(YamlFrontMatterExtension.create());
private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build();
private static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).build();
@@ -30,16 +28,12 @@ public void simpleValue() {
"\ngreat";
final String rendered = "
+````````````````````````````````
+
diff --git a/commonmark-test-util/src/main/resources/commonmark.js-regression.txt b/commonmark-test-util/src/main/resources/commonmark.js-regression.txt
index ec5143eff..16a0e8c35 100644
--- a/commonmark-test-util/src/main/resources/commonmark.js-regression.txt
+++ b/commonmark-test-util/src/main/resources/commonmark.js-regression.txt
@@ -80,7 +80,7 @@ Issue jgm/CommonMark#468 - backslash at end of link definition
[]: test
````````````````````````````````
-Issue jgm/commonmark.js#121 - punctuation set different
+Issue commonmark/commonmark.js#121 - punctuation set different
```````````````````````````````` example
^_test_
@@ -122,7 +122,15 @@ Double-encoding.
```````````````````````````````` example
[XSS](javascript:alert%28'XSS'%29)
.
-
+````````````````````````````````
diff --git a/commonmark-ext-gfm-tables/src/test/resources/gfm-spec.txt b/commonmark-test-util/src/main/resources/gfm-spec.txt
similarity index 99%
rename from commonmark-ext-gfm-tables/src/test/resources/gfm-spec.txt
rename to commonmark-test-util/src/main/resources/gfm-spec.txt
index 582131d70..d42f3369e 100644
--- a/commonmark-ext-gfm-tables/src/test/resources/gfm-spec.txt
+++ b/commonmark-test-util/src/main/resources/gfm-spec.txt
@@ -130,7 +130,7 @@ questions it does not answer:
not require that. This is hardly a "corner case," and divergences
between implementations on this issue often lead to surprises for
users in real documents. (See [this comment by John
- Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).)
+ Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).)
2. Is a blank line needed before a block quote or heading?
Most implementations do not require the blank line. However,
@@ -138,7 +138,7 @@ questions it does not answer:
also to ambiguities in parsing (note that some implementations
put the heading inside the blockquote, while others do not).
(John Gruber has also spoken [in favor of requiring the blank
- lines](http://article.gmane.org/gmane.text.markdown.general/2146).)
+ lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).)
3. Is a blank line needed before an indented code block?
(`Markdown.pl` requires it, but this is not mentioned in the
@@ -171,7 +171,7 @@ questions it does not answer:
```
(There are some relevant comments by John Gruber
- [here](http://article.gmane.org/gmane.text.markdown.general/2554).)
+ [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).)
5. Can list markers be indented? Can ordered list markers be right-aligned?
@@ -1001,10 +1001,7 @@ interpretable as a [code fence], [ATX heading][ATX headings],
A [setext heading underline](@) is a sequence of
`=` characters or a sequence of `-` characters, with no more than 3
-spaces indentation and any number of trailing spaces. If a line
-containing a single `-` can be interpreted as an
-empty [list items], it should be interpreted this way
-and not as a [setext heading underline].
+spaces of indentation and any number of trailing spaces or tabs.
The heading is a level 1 heading if `=` characters are used in
the [setext heading underline], and a level 2 heading if `-`
@@ -1638,7 +1635,7 @@ has been found, the code block contains all of the lines after the
opening code fence until the end of the containing block (or
document). (An alternative spec would require backtracking in the
event that a closing code fence is not found. But this makes parsing
-much less efficient, and there seems to be no real down side to the
+much less efficient, and there seems to be no real downside to the
behavior described here.)
A fenced code block may interrupt a paragraph, and does not require
@@ -2068,7 +2065,7 @@ followed by an uppercase ASCII letter.\
``.
-6. **Start condition:** line begins the string `<` or ``
+6. **Start condition:** line begins with the string `<` or ``
followed by one of the strings (case-insensitive) `address`,
`article`, `aside`, `base`, `basefont`, `blockquote`, `body`,
`caption`, `center`, `col`, `colgroup`, `dd`, `details`, `dialog`,
@@ -2077,7 +2074,7 @@ followed by one of the strings (case-insensitive) `address`,
`h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`,
`html`, `iframe`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`,
`nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`,
-`section`, `source`, `summary`, `table`, `tbody`, `td`,
+`section`, `summary`, `table`, `tbody`, `td`,
`tfoot`, `th`, `thead`, `title`, `tr`, `track`, `ul`, followed
by [whitespace], the end of the line, the string `>`, or
the string `/>`.\
@@ -5279,7 +5276,7 @@ well. ([reStructuredText](http://docutils.sourceforge.net/rst.html)
takes a different approach, requiring blank lines before lists
even inside other list items.)
-In order to solve of unwanted lists in paragraphs with
+In order to solve the problem of unwanted lists in paragraphs with
hard-wrapped numerals, we allow only lists starting with `1` to
interrupt paragraphs. Thus,
@@ -6929,7 +6926,7 @@ foo__bar__
```````````````````````````````` example
__foo, __bar__, baz__
.
-
````````````````````````````````
@@ -9410,10 +9407,9 @@ character, and a `>` character.
A [closing tag](@) consists of the string ``, a
[tag name], optional [whitespace], and the character `>`.
-An [HTML comment](@) consists of ``,
-where *text* does not start with `>` or `->`, does not end with `-`,
-and does not contain `--`. (See the
-[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).)
+An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the
+[HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)).
A [processing instruction](@)
consists of the string ``, a string
@@ -9554,30 +9550,20 @@ Illegal attributes in closing tag:
Comments:
```````````````````````````````` example
-foo
-.
-
foo
-````````````````````````````````
-
-
-```````````````````````````````` example
-foo
+foo
.
-
````````````````````````````````
@@ -10224,4 +10210,3 @@ closers:
After we're done, we remove all delimiters above `stack_bottom` from the
delimiter stack.
-
diff --git a/commonmark-test-util/src/main/resources/spec.txt b/commonmark-test-util/src/main/resources/spec.txt
index 3913de442..f1fab281e 100644
--- a/commonmark-test-util/src/main/resources/spec.txt
+++ b/commonmark-test-util/src/main/resources/spec.txt
@@ -1,9 +1,9 @@
---
title: CommonMark Spec
author: John MacFarlane
-version: 0.29
-date: '2019-04-06'
-license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
+version: '0.31.2'
+date: '2024-01-28'
+license: '[CC-BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)'
...
# Introduction
@@ -14,7 +14,7 @@ Markdown is a plain text format for writing structured documents,
based on conventions for indicating formatting in email
and usenet posts. It was developed by John Gruber (with
help from Aaron Swartz) and released in 2004 in the form of a
-[syntax description](http://daringfireball.net/projects/markdown/syntax)
+[syntax description](https://daringfireball.net/projects/markdown/syntax)
and a Perl script (`Markdown.pl`) for converting Markdown to
HTML. In the next decade, dozens of implementations were
developed in many languages. Some extended the original
@@ -34,10 +34,10 @@ As Gruber writes:
> Markdown-formatted document should be publishable as-is, as
> plain text, without looking like it's been marked up with tags
> or formatting instructions.
-> ()
+> ()
The point can be illustrated by comparing a sample of
-[AsciiDoc](http://www.methods.co.nz/asciidoc/) with
+[AsciiDoc](https://asciidoc.org/) with
an equivalent sample of Markdown. Here is a sample of
AsciiDoc from the AsciiDoc manual:
@@ -103,7 +103,7 @@ source, not just in the processed document.
## Why is a spec needed?
John Gruber's [canonical description of Markdown's
-syntax](http://daringfireball.net/projects/markdown/syntax)
+syntax](https://daringfireball.net/projects/markdown/syntax)
does not specify the syntax unambiguously. Here are some examples of
questions it does not answer:
@@ -114,7 +114,7 @@ questions it does not answer:
not require that. This is hardly a "corner case," and divergences
between implementations on this issue often lead to surprises for
users in real documents. (See [this comment by John
- Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).)
+ Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).)
2. Is a blank line needed before a block quote or heading?
Most implementations do not require the blank line. However,
@@ -122,7 +122,7 @@ questions it does not answer:
also to ambiguities in parsing (note that some implementations
put the heading inside the blockquote, while others do not).
(John Gruber has also spoken [in favor of requiring the blank
- lines](http://article.gmane.org/gmane.text.markdown.general/2146).)
+ lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).)
3. Is a blank line needed before an indented code block?
(`Markdown.pl` requires it, but this is not mentioned in the
@@ -155,7 +155,7 @@ questions it does not answer:
```
(There are some relevant comments by John Gruber
- [here](http://article.gmane.org/gmane.text.markdown.general/2554).)
+ [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).)
5. Can list markers be indented? Can ordered list markers be right-aligned?
@@ -270,6 +270,16 @@ of representing the structural distinctions we need to make, and the
choice of HTML for the tests makes it possible to run the tests against
an implementation without writing an abstract syntax tree renderer.
+Note that not every feature of the HTML samples is mandated by
+the spec. For example, the spec says what counts as a link
+destination, but it doesn't mandate that non-ASCII characters in
+the URL be percent-encoded. To use the automatic tests,
+implementers will need to provide a renderer that conforms to
+the expectations of the spec examples (percent-encoding
+non-ASCII characters in URLs). But a conforming implementation
+can use a different renderer and may choose not to
+percent-encode non-ASCII characters in URLs.
+
This document is generated from a text file, `spec.txt`, written
in Markdown with a small extension for the side-by-side tests.
The script `tools/makespec.py` can be used to convert `spec.txt` into
@@ -294,37 +304,31 @@ of [characters] rather than bytes. A conforming parser may be limited
to a certain encoding.
A [line](@) is a sequence of zero or more [characters]
-other than newline (`U+000A`) or carriage return (`U+000D`),
+other than line feed (`U+000A`) or carriage return (`U+000D`),
followed by a [line ending] or by the end of file.
-A [line ending](@) is a newline (`U+000A`), a carriage return
-(`U+000D`) not followed by a newline, or a carriage return and a
-following newline.
+A [line ending](@) is a line feed (`U+000A`), a carriage return
+(`U+000D`) not followed by a line feed, or a carriage return and a
+following line feed.
A line containing no characters, or a line containing only spaces
(`U+0020`) or tabs (`U+0009`), is called a [blank line](@).
The following definitions of character classes will be used in this spec:
-A [whitespace character](@) is a space
-(`U+0020`), tab (`U+0009`), newline (`U+000A`), line tabulation (`U+000B`),
-form feed (`U+000C`), or carriage return (`U+000D`).
-
-[Whitespace](@) is a sequence of one or more [whitespace
-characters].
+A [Unicode whitespace character](@) is a character in the Unicode `Zs` general
+category, or a tab (`U+0009`), line feed (`U+000A`), form feed (`U+000C`), or
+carriage return (`U+000D`).
-A [Unicode whitespace character](@) is
-any code point in the Unicode `Zs` general category, or a tab (`U+0009`),
-carriage return (`U+000D`), newline (`U+000A`), or form feed
-(`U+000C`).
+[Unicode whitespace](@) is a sequence of one or more
+[Unicode whitespace characters].
-[Unicode whitespace](@) is a sequence of one
-or more [Unicode whitespace characters].
+A [tab](@) is `U+0009`.
A [space](@) is `U+0020`.
-A [non-whitespace character](@) is any character
-that is not a [whitespace character].
+An [ASCII control character](@) is a character between `U+0000–1F` (both
+including) or `U+007F`.
An [ASCII punctuation character](@)
is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`,
@@ -333,14 +337,13 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`,
`[`, `\`, `]`, `^`, `_`, `` ` `` (U+005B–0060),
`{`, `|`, `}`, or `~` (U+007B–007E).
-A [punctuation character](@) is an [ASCII
-punctuation character] or anything in
-the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
+A [Unicode punctuation character](@) is a character in the Unicode `P`
+(puncuation) or `S` (symbol) general categories.
## Tabs
Tabs in lines are not expanded to [spaces]. However,
-in contexts where whitespace helps to define block structure,
+in contexts where spaces help to define block structure,
tabs behave as if they were replaced by spaces with a tab stop
of 4 characters.
@@ -478,3267 +481,3620 @@ bar
For security reasons, the Unicode character `U+0000` must be replaced
with the REPLACEMENT CHARACTER (`U+FFFD`).
-# Blocks and inlines
-
-We can think of a document as a sequence of
-[blocks](@)---structural elements like paragraphs, block
-quotations, lists, headings, rules, and code blocks. Some blocks (like
-block quotes and list items) contain other blocks; others (like
-headings and paragraphs) contain [inline](@) content---text,
-links, emphasized text, images, code spans, and so on.
-## Precedence
+## Backslash escapes
-Indicators of block structure always take precedence over indicators
-of inline structure. So, for example, the following is a list with
-two items, not a list with one item containing a code span:
+Any ASCII punctuation character may be backslash-escaped:
```````````````````````````````` example
-- `one
-- two`
+\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~
.
-
-
`one
-
two`
-
+
!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
````````````````````````````````
-This means that parsing can proceed in two steps: first, the block
-structure of the document can be discerned; second, text lines inside
-paragraphs, headings, and other block constructs can be parsed for inline
-structure. The second step requires information about link reference
-definitions that will be available only at the end of the first
-step. Note that the first step requires processing lines in sequence,
-but the second can be parallelized, since the inline parsing of
-one block element does not affect the inline parsing of any other.
-
-## Container blocks and leaf blocks
-
-We can divide blocks into two types:
-[container blocks](@),
-which can contain other blocks, and [leaf blocks](@),
-which cannot.
-
-# Leaf blocks
+Backslashes before other characters are treated as literal
+backslashes:
-This section describes the different kinds of leaf block that make up a
-Markdown document.
+```````````````````````````````` example
+\→\A\a\ \3\φ\«
+.
+
\→\A\a\ \3\φ\«
+````````````````````````````````
-## Thematic breaks
-A line consisting of 0-3 spaces of indentation, followed by a sequence
-of three or more matching `-`, `_`, or `*` characters, each followed
-optionally by any number of spaces or tabs, forms a
-[thematic break](@).
+Escaped characters are treated as regular characters and do
+not have their usual Markdown meanings:
```````````````````````````````` example
-***
----
-___
+\*not emphasized*
+\ not a tag
+\[not a link](/foo)
+\`not code`
+1\. not a list
+\* not a list
+\# not a heading
+\[foo]: /url "not a reference"
+\ö not a character entity
.
-
-
-
+
*not emphasized*
+<br/> not a tag
+[not a link](/foo)
+`not code`
+1. not a list
+* not a list
+# not a heading
+[foo]: /url "not a reference"
+ö not a character entity
````````````````````````````````
-Wrong characters:
+If a backslash is itself escaped, the following character is not:
```````````````````````````````` example
-+++
+\\*emphasis*
.
-
+++
+
\emphasis
````````````````````````````````
+A backslash at the end of the line is a [hard line break]:
+
```````````````````````````````` example
-===
+foo\
+bar
.
-
===
+
foo
+bar
````````````````````````````````
-Not enough characters:
+Backslash escapes do not work in code blocks, code spans, autolinks, or
+raw HTML:
```````````````````````````````` example
---
-**
-__
+`` \[\` ``
.
-
--
-**
-__
+
\[\`
````````````````````````````````
-One to three spaces indent are allowed:
-
```````````````````````````````` example
- ***
- ***
- ***
+ \[\]
.
-
-
-
+
\[\]
+
````````````````````````````````
-Four spaces is too many:
-
```````````````````````````````` example
- ***
+~~~
+\[\]
+~~~
.
-
***
+
\[\]
````````````````````````````````
```````````````````````````````` example
-Foo
- ***
+
.
-
````````````````````````````````
-It is required that all of the [non-whitespace characters] be the same.
-So, this is not a thematic break:
+[Decimal numeric character
+references](@)
+consist of `` + a string of 1--7 arabic digits + `;`. A
+numeric character reference is parsed as the corresponding
+Unicode character. Invalid Unicode code points will be replaced by
+the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons,
+the code point `U+0000` will also be replaced by `U+FFFD`.
```````````````````````````````` example
- *-*
+# Ӓ Ϡ
.
-
-
+
# Ӓ Ϡ �
````````````````````````````````
-Thematic breaks do not need blank lines before or after:
+[Hexadecimal numeric character
+references](@) consist of `` +
+either `X` or `x` + a string of 1-6 hexadecimal digits + `;`.
+They too are parsed as the corresponding Unicode character (this
+time specified with a hexadecimal numeral instead of decimal).
```````````````````````````````` example
-- foo
-***
-- bar
+" ആ ಫ
.
-
-
foo
-
-
-
-
bar
-
+
" ആ ಫ
````````````````````````````````
-Thematic breaks can interrupt a paragraph:
+Here are some nonentities:
```````````````````````````````` example
-Foo
-***
-bar
+  &x;
+
+abcdef0;
+&ThisIsNotDefined; &hi?;
.
-
````````````````````````````````
-When both a thematic break and a list item are possible
-interpretations of a line, the thematic break takes precedence:
+Strings that are not on the list of HTML5 named entities are not
+recognized as entity references either:
```````````````````````````````` example
-* Foo
-* * *
-* Bar
+&MadeUpEntity;
.
-
-
Foo
-
-
-
-
Bar
-
+
&MadeUpEntity;
````````````````````````````````
-If you want a thematic break in a list item, use a different bullet:
+Entity and numeric character references are recognized in any
+context besides code spans or code blocks, including
+URLs, [link titles], and [fenced code block][] [info strings]:
```````````````````````````````` example
-- Foo
-- * * *
+
.
-
````````````````````````````````
-More than six `#` characters is not a heading:
-
```````````````````````````````` example
-####### foo
+``` föö
+foo
+```
.
-
####### foo
+
foo
+
````````````````````````````````
-At least one space is required between the `#` characters and the
-heading's contents, unless the heading is empty. Note that many
-implementations currently do not require the space. However, the
-space was required by the
-[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py),
-and it helps prevent things like the following from being parsed as
-headings:
+Entity and numeric character references are treated as literal
+text in code spans and code blocks:
```````````````````````````````` example
-#5 bolt
-
-#hashtag
+`föö`
.
-
#5 bolt
-
#hashtag
+
föö
````````````````````````````````
-This is not a heading, because the first `#` is escaped:
-
```````````````````````````````` example
-\## foo
+ föfö
.
-
## foo
+
föfö
+
````````````````````````````````
-Contents are parsed as inlines:
+Entity and numeric character references cannot be used
+in place of symbols indicating structure in CommonMark
+documents.
```````````````````````````````` example
-# foo *bar* \*baz\*
+*foo*
+*foo*
.
-
foo bar *baz*
+
*foo*
+foo
````````````````````````````````
+```````````````````````````````` example
+* foo
-Leading and trailing [whitespace] is ignored in parsing inline content:
+* foo
+.
+
* foo
+
+
foo
+
+````````````````````````````````
```````````````````````````````` example
-# foo
+foo
bar
.
-
foo
+
foo
+
+bar
````````````````````````````````
+```````````````````````````````` example
+ foo
+.
+
→foo
+````````````````````````````````
-One to three spaces indentation are allowed:
```````````````````````````````` example
- ### foo
- ## foo
- # foo
+[a](url "tit")
.
-
foo
-
foo
-
foo
+
[a](url "tit")
````````````````````````````````
-Four spaces are too much:
+
+# Blocks and inlines
+
+We can think of a document as a sequence of
+[blocks](@)---structural elements like paragraphs, block
+quotations, lists, headings, rules, and code blocks. Some blocks (like
+block quotes and list items) contain other blocks; others (like
+headings and paragraphs) contain [inline](@) content---text,
+links, emphasized text, images, code spans, and so on.
+
+## Precedence
+
+Indicators of block structure always take precedence over indicators
+of inline structure. So, for example, the following is a list with
+two items, not a list with one item containing a code span:
```````````````````````````````` example
- # foo
+- `one
+- two`
.
-
# foo
-
+
+
`one
+
two`
+
````````````````````````````````
+This means that parsing can proceed in two steps: first, the block
+structure of the document can be discerned; second, text lines inside
+paragraphs, headings, and other block constructs can be parsed for inline
+structure. The second step requires information about link reference
+definitions that will be available only at the end of the first
+step. Note that the first step requires processing lines in sequence,
+but the second can be parallelized, since the inline parsing of
+one block element does not affect the inline parsing of any other.
+
+## Container blocks and leaf blocks
+
+We can divide blocks into two types:
+[container blocks](#container-blocks),
+which can contain other blocks, and [leaf blocks](#leaf-blocks),
+which cannot.
+
+# Leaf blocks
+
+This section describes the different kinds of leaf block that make up a
+Markdown document.
+
+## Thematic breaks
+
+A line consisting of optionally up to three spaces of indentation, followed by a
+sequence of three or more matching `-`, `_`, or `*` characters, each followed
+optionally by any number of spaces or tabs, forms a
+[thematic break](@).
+
```````````````````````````````` example
-foo
- # bar
+***
+---
+___
.
-
foo
-# bar
+
+
+
````````````````````````````````
-A closing sequence of `#` characters is optional:
+Wrong characters:
```````````````````````````````` example
-## foo ##
- ### bar ###
++++
.
-
foo
-
bar
+
+++
````````````````````````````````
-It need not be the same length as the opening sequence:
-
```````````````````````````````` example
-# foo ##################################
-##### foo ##
+===
.
-
foo
-
foo
+
===
````````````````````````````````
-Spaces are allowed after the closing sequence:
+Not enough characters:
```````````````````````````````` example
-### foo ###
+--
+**
+__
.
-
foo
+
--
+**
+__
````````````````````````````````
-A sequence of `#` characters with anything but [spaces] following it
-is not a closing sequence, but counts as part of the contents of the
-heading:
+Up to three spaces of indentation are allowed:
```````````````````````````````` example
-### foo ### b
+ ***
+ ***
+ ***
.
-
foo ### b
+
+
+
````````````````````````````````
-The closing sequence must be preceded by a space:
+Four spaces of indentation is too many:
```````````````````````````````` example
-# foo#
+ ***
.
-
foo#
+
***
+
````````````````````````````````
-Backslash-escaped `#` characters do not count as part
-of the closing sequence:
-
```````````````````````````````` example
-### foo \###
-## foo #\##
-# foo \#
+Foo
+ ***
.
-
foo ###
-
foo ###
-
foo #
+
Foo
+***
````````````````````````````````
-ATX headings need not be separated from surrounding content by blank
-lines, and they can interrupt paragraphs:
+More than three characters may be used:
```````````````````````````````` example
-****
-## foo
-****
+_____________________________________
.
-
foo
-
````````````````````````````````
+Spaces and tabs are allowed between the characters:
+
```````````````````````````````` example
-Foo bar
-# baz
-Bar foo
+ - - -
.
-
Foo bar
-
baz
-
Bar foo
+
````````````````````````````````
-ATX headings can be empty:
-
```````````````````````````````` example
-##
-#
-### ###
+ ** * ** * ** * **
.
-
-
-
+
````````````````````````````````
-## Setext headings
+```````````````````````````````` example
+- - - -
+.
+
+````````````````````````````````
-A [setext heading](@) consists of one or more
-lines of text, each containing at least one [non-whitespace
-character], with no more than 3 spaces indentation, followed by
-a [setext heading underline]. The lines of text must be such
-that, were they not followed by the setext heading underline,
-they would be interpreted as a paragraph: they cannot be
-interpretable as a [code fence], [ATX heading][ATX headings],
-[block quote][block quotes], [thematic break][thematic breaks],
-[list item][list items], or [HTML block][HTML blocks].
-A [setext heading underline](@) is a sequence of
-`=` characters or a sequence of `-` characters, with no more than 3
-spaces indentation and any number of trailing spaces. If a line
-containing a single `-` can be interpreted as an
-empty [list items], it should be interpreted this way
-and not as a [setext heading underline].
+Spaces and tabs are allowed at the end:
-The heading is a level 1 heading if `=` characters are used in
-the [setext heading underline], and a level 2 heading if `-`
-characters are used. The contents of the heading are the result
-of parsing the preceding lines of text as CommonMark inline
-content.
+```````````````````````````````` example
+- - - -
+.
+
+````````````````````````````````
-In general, a setext heading need not be preceded or followed by a
-blank line. However, it cannot interrupt a paragraph, so when a
-setext heading comes after a paragraph, a blank line is needed between
-them.
-Simple examples:
+However, no other characters may occur in the line:
```````````````````````````````` example
-Foo *bar*
-=========
+_ _ _ _ a
-Foo *bar*
----------
+a------
+
+---a---
.
-
Foo bar
-
Foo bar
+
_ _ _ _ a
+
a------
+
---a---
````````````````````````````````
-The content of the header may span more than one line:
+It is required that all of the characters other than spaces or tabs be the same.
+So, this is not a thematic break:
```````````````````````````````` example
-Foo *bar
-baz*
-====
+ *-*
.
-
Foo bar
-baz
+
-
````````````````````````````````
-The contents are the result of parsing the headings's raw
-content as inlines. The heading's raw content is formed by
-concatenating the lines and removing initial and final
-[whitespace].
+
+Thematic breaks do not need blank lines before or after:
```````````````````````````````` example
- Foo *bar
-baz*→
-====
+- foo
+***
+- bar
.
-
Foo bar
-baz
+
+
foo
+
+
+
+
bar
+
````````````````````````````````
-The underlining can be any length:
+Thematic breaks can interrupt a paragraph:
```````````````````````````````` example
Foo
--------------------------
-
-Foo
-=
+***
+bar
.
-
Foo
-
Foo
+
Foo
+
+
bar
````````````````````````````````
-The heading content can be indented up to three spaces, and need
-not line up with the underlining:
+If a line of dashes that meets the above conditions for being a
+thematic break could also be interpreted as the underline of a [setext
+heading], the interpretation as a
+[setext heading] takes precedence. Thus, for example,
+this is a setext heading, not a paragraph followed by a thematic break:
```````````````````````````````` example
- Foo
+Foo
---
-
- Foo
------
-
- Foo
- ===
+bar
.
Foo
-
Foo
-
Foo
+
bar
````````````````````````````````
-Four spaces indent is too much:
+When both a thematic break and a list item are possible
+interpretations of a line, the thematic break takes precedence:
```````````````````````````````` example
- Foo
- ---
-
- Foo
----
+* Foo
+* * *
+* Bar
.
-
Foo
----
+
+
Foo
+
+
+
+
Bar
+
+````````````````````````````````
-Foo
-
+
+If you want a thematic break in a list item, use a different bullet:
+
+```````````````````````````````` example
+- Foo
+- * * *
+.
+
+
Foo
+
+
+
````````````````````````````````
-The setext heading underline can be indented up to three spaces, and
-may have trailing spaces:
+## ATX headings
+
+An [ATX heading](@)
+consists of a string of characters, parsed as inline content, between an
+opening sequence of 1--6 unescaped `#` characters and an optional
+closing sequence of any number of unescaped `#` characters.
+The opening sequence of `#` characters must be followed by spaces or tabs, or
+by the end of line. The optional closing sequence of `#`s must be preceded by
+spaces or tabs and may be followed by spaces or tabs only. The opening
+`#` character may be preceded by up to three spaces of indentation. The raw
+contents of the heading are stripped of leading and trailing space or tabs
+before being parsed as inline content. The heading level is equal to the number
+of `#` characters in the opening sequence.
+
+Simple headings:
```````````````````````````````` example
-Foo
- ----
+# foo
+## foo
+### foo
+#### foo
+##### foo
+###### foo
.
-
Foo
+
foo
+
foo
+
foo
+
foo
+
foo
+
foo
````````````````````````````````
-Four spaces is too much:
+More than six `#` characters is not a heading:
```````````````````````````````` example
-Foo
- ---
+####### foo
.
-
Foo
----
+
####### foo
````````````````````````````````
-The setext heading underline cannot contain internal spaces:
+At least one space or tab is required between the `#` characters and the
+heading's contents, unless the heading is empty. Note that many
+implementations currently do not require the space. However, the
+space was required by the
+[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py),
+and it helps prevent things like the following from being parsed as
+headings:
```````````````````````````````` example
-Foo
-= =
+#5 bolt
-Foo
---- -
+#hashtag
.
-
Foo
-= =
-
Foo
-
+
#5 bolt
+
#hashtag
````````````````````````````````
-Trailing spaces in the content line do not cause a line break:
+This is not a heading, because the first `#` is escaped:
```````````````````````````````` example
-Foo
------
+\## foo
.
-
Foo
+
## foo
````````````````````````````````
-Nor does a backslash at the end:
+Contents are parsed as inlines:
```````````````````````````````` example
-Foo\
-----
+# foo *bar* \*baz\*
.
-
Foo\
+
foo bar *baz*
````````````````````````````````
-Since indicators of block structure take precedence over
-indicators of inline structure, the following are setext headings:
+Leading and trailing spaces or tabs are ignored in parsing inline content:
```````````````````````````````` example
-`Foo
-----
-`
-
-
+# foo
.
-
`Foo
-
`
-
<a title="a lot
-
of dashes"/>
+
foo
````````````````````````````````
-The setext heading underline cannot be a [lazy continuation
-line] in a list item or block quote:
+Up to three spaces of indentation are allowed:
```````````````````````````````` example
-> Foo
----
+ ### foo
+ ## foo
+ # foo
.
-
-
Foo
-
-
+
foo
+
foo
+
foo
````````````````````````````````
+Four spaces of indentation is too many:
+
```````````````````````````````` example
-> foo
-bar
-===
+ # foo
.
-
-
foo
-bar
-===
-
+
# foo
+
````````````````````````````````
```````````````````````````````` example
-- Foo
----
+foo
+ # bar
.
-
-
Foo
-
-
+
foo
+# bar
````````````````````````````````
-A blank line is needed between a paragraph and a following
-setext heading, since otherwise the paragraph becomes part
-of the heading's content:
+A closing sequence of `#` characters is optional:
```````````````````````````````` example
-Foo
-Bar
----
+## foo ##
+ ### bar ###
.
-
Foo
-Bar
+
foo
+
bar
````````````````````````````````
-But in general a blank line is not required before or after
-setext headings:
+It need not be the same length as the opening sequence:
```````````````````````````````` example
----
-Foo
----
-Bar
----
-Baz
+# foo ##################################
+##### foo ##
.
-
-
Foo
-
Bar
-
Baz
+
foo
+
foo
````````````````````````````````
-Setext headings cannot be empty:
+Spaces or tabs are allowed after the closing sequence:
```````````````````````````````` example
+### foo ###
+.
+
foo
+````````````````````````````````
-====
+
+A sequence of `#` characters with anything but spaces or tabs following it
+is not a closing sequence, but counts as part of the contents of the
+heading:
+
+```````````````````````````````` example
+### foo ### b
.
-
====
+
foo ### b
````````````````````````````````
-Setext heading text lines must not be interpretable as block
-constructs other than paragraphs. So, the line of dashes
-in these examples gets interpreted as a thematic break:
+The closing sequence must be preceded by a space or tab:
```````````````````````````````` example
----
----
+# foo#
.
-
-
+
foo#
````````````````````````````````
+Backslash-escaped `#` characters do not count as part
+of the closing sequence:
+
```````````````````````````````` example
-- foo
------
+### foo \###
+## foo #\##
+# foo \#
.
-
-
foo
-
-
+
foo ###
+
foo ###
+
foo #
````````````````````````````````
+ATX headings need not be separated from surrounding content by blank
+lines, and they can interrupt paragraphs:
+
```````````````````````````````` example
- foo
----
+****
+## foo
+****
.
-
foo
-
+
+
foo
````````````````````````````````
```````````````````````````````` example
-> foo
------
+Foo bar
+# baz
+Bar foo
.
-
-
foo
-
-
+
Foo bar
+
baz
+
Bar foo
````````````````````````````````
-If you want a heading with `> foo` as its literal text, you can
-use backslash escapes:
+ATX headings can be empty:
```````````````````````````````` example
-\> foo
-------
+##
+#
+### ###
.
-
> foo
+
+
+
````````````````````````````````
-**Compatibility note:** Most existing Markdown implementations
-do not allow the text of setext headings to span multiple lines.
-But there is no consensus about how to interpret
+## Setext headings
-``` markdown
-Foo
-bar
----
-baz
-```
+A [setext heading](@) consists of one or more
+lines of text, not interrupted by a blank line, of which the first line does not
+have more than 3 spaces of indentation, followed by
+a [setext heading underline]. The lines of text must be such
+that, were they not followed by the setext heading underline,
+they would be interpreted as a paragraph: they cannot be
+interpretable as a [code fence], [ATX heading][ATX headings],
+[block quote][block quotes], [thematic break][thematic breaks],
+[list item][list items], or [HTML block][HTML blocks].
-One can find four different interpretations:
+A [setext heading underline](@) is a sequence of
+`=` characters or a sequence of `-` characters, with no more than 3
+spaces of indentation and any number of trailing spaces or tabs.
-1. paragraph "Foo", heading "bar", paragraph "baz"
-2. paragraph "Foo bar", thematic break, paragraph "baz"
-3. paragraph "Foo bar --- baz"
-4. heading "Foo bar", paragraph "baz"
+The heading is a level 1 heading if `=` characters are used in
+the [setext heading underline], and a level 2 heading if `-`
+characters are used. The contents of the heading are the result
+of parsing the preceding lines of text as CommonMark inline
+content.
-We find interpretation 4 most natural, and interpretation 4
-increases the expressive power of CommonMark, by allowing
-multiline headings. Authors who want interpretation 1 can
-put a blank line after the first paragraph:
+In general, a setext heading need not be preceded or followed by a
+blank line. However, it cannot interrupt a paragraph, so when a
+setext heading comes after a paragraph, a blank line is needed between
+them.
+
+Simple examples:
```````````````````````````````` example
-Foo
+Foo *bar*
+=========
-bar
----
-baz
+Foo *bar*
+---------
.
-
Foo
-
bar
-
baz
+
Foo bar
+
Foo bar
````````````````````````````````
-Authors who want interpretation 2 can put blank lines around
-the thematic break,
+The content of the header may span more than one line:
```````````````````````````````` example
-Foo
-bar
+Foo *bar
+baz*
+====
+.
+
Foo bar
+baz
+````````````````````````````````
----
+The contents are the result of parsing the headings's raw
+content as inlines. The heading's raw content is formed by
+concatenating the lines and removing initial and final
+spaces or tabs.
-baz
+```````````````````````````````` example
+ Foo *bar
+baz*→
+====
.
-
Foo
-bar
-
-
baz
+
Foo bar
+baz
````````````````````````````````
-or use a thematic break that cannot count as a [setext heading
-underline], such as
+The underlining can be any length:
```````````````````````````````` example
Foo
-bar
-* * *
-baz
+-------------------------
+
+Foo
+=
.
-
Foo
-bar
-
-
baz
+
Foo
+
Foo
````````````````````````````````
-Authors who want interpretation 3 can use backslash escapes:
+The heading content can be preceded by up to three spaces of indentation, and
+need not line up with the underlining:
```````````````````````````````` example
-Foo
-bar
-\---
-baz
-.
-
+````````````````````````````````
-An [indented code block](@) is composed of one or more
-[indented chunks] separated by blank lines.
-An [indented chunk](@) is a sequence of non-blank lines,
-each indented four or more spaces. The contents of the code block are
-the literal contents of the lines, including trailing
-[line endings], minus four spaces of indentation.
-An indented code block has no [info string].
-An indented code block cannot interrupt a paragraph, so there must be
-a blank line between a paragraph and a following indented code block.
-(A blank line is not needed, however, between a code block and a following
-paragraph.)
+Four spaces of indentation is too many:
```````````````````````````````` example
- a simple
- indented code block
+ Foo
+ ---
+
+ Foo
+---
.
-
a simple
- indented code block
+
Foo
+---
+
+Foo
+
````````````````````````````````
-If there is any ambiguity between an interpretation of indentation
-as a code block and as indicating that material belongs to a [list
-item][list items], the list item interpretation takes precedence:
+The setext heading underline can be preceded by up to three spaces of
+indentation, and may have trailing spaces or tabs:
```````````````````````````````` example
- - foo
-
- bar
+Foo
+ ----
.
-
-
-
foo
-
bar
-
-
+
Foo
````````````````````````````````
-```````````````````````````````` example
-1. foo
-
- - bar
-.
-
-
-
foo
-
-
bar
-
-
-
-````````````````````````````````
-
-
-
-The contents of a code block are literal text, and do not get parsed
-as Markdown:
+Four spaces of indentation is too many:
```````````````````````````````` example
-
- *hi*
-
- - one
+Foo
+ ---
.
-
<a/>
-*hi*
-
-- one
-
+
Foo
+---
````````````````````````````````
-Here we have three chunks separated by blank lines:
+The setext heading underline cannot contain internal spaces or tabs:
```````````````````````````````` example
- chunk1
+Foo
+= =
- chunk2
-
-
-
- chunk3
+Foo
+--- -
.
-
chunk1
-
-chunk2
-
-
-
-chunk3
-
+
Foo
+= =
+
Foo
+
````````````````````````````````
-Any initial spaces beyond four will be included in the content, even
-in interior blank lines:
+Trailing spaces or tabs in the content line do not cause a hard line break:
```````````````````````````````` example
- chunk1
-
- chunk2
+Foo
+-----
.
-
chunk1
-
- chunk2
-
+
Foo
````````````````````````````````
-An indented code block cannot interrupt a paragraph. (This
-allows hanging indents and the like.)
+Nor does a backslash at the end:
```````````````````````````````` example
-Foo
- bar
-
+Foo\
+----
.
-
Foo
-bar
+
Foo\
````````````````````````````````
-However, any non-blank line with fewer than four leading spaces ends
-the code block immediately. So a paragraph may occur immediately
-after indented code:
+Since indicators of block structure take precedence over
+indicators of inline structure, the following are setext headings:
```````````````````````````````` example
- foo
-bar
+`Foo
+----
+`
+
+
.
-
foo
-
-
bar
+
`Foo
+
`
+
<a title="a lot
+
of dashes"/>
````````````````````````````````
-And indented code can occur immediately before and after other kinds of
-blocks:
+The setext heading underline cannot be a [lazy continuation
+line] in a list item or block quote:
```````````````````````````````` example
-# Heading
- foo
-Heading
-------
- foo
-----
+> Foo
+---
.
-
Heading
-
foo
-
-
Heading
-
foo
-
+
+
Foo
+
````````````````````````````````
-The first line can be indented more than four spaces:
-
```````````````````````````````` example
- foo
- bar
+> foo
+bar
+===
.
-
foo
+
+
foo
bar
-
+===
+
````````````````````````````````
-Blank lines preceding or following an indented code block
-are not included in it:
-
```````````````````````````````` example
-
-
- foo
-
-
+- Foo
+---
.
-
foo
-
+
+
Foo
+
+
````````````````````````````````
-Trailing spaces are included in the code block's content:
+A blank line is needed between a paragraph and a following
+setext heading, since otherwise the paragraph becomes part
+of the heading's content:
```````````````````````````````` example
- foo
+Foo
+Bar
+---
.
-
foo
-
+
Foo
+Bar
````````````````````````````````
+But in general a blank line is not required before or after
+setext headings:
-## Fenced code blocks
-
-A [code fence](@) is a sequence
-of at least three consecutive backtick characters (`` ` ``) or
-tildes (`~`). (Tildes and backticks cannot be mixed.)
-A [fenced code block](@)
-begins with a code fence, indented no more than three spaces.
-
-The line with the opening code fence may optionally contain some text
-following the code fence; this is trimmed of leading and trailing
-whitespace and called the [info string](@). If the [info string] comes
-after a backtick fence, it may not contain any backtick
-characters. (The reason for this restriction is that otherwise
-some inline code would be incorrectly interpreted as the
-beginning of a fenced code block.)
-
-The content of the code block consists of all subsequent lines, until
-a closing [code fence] of the same type as the code block
-began with (backticks or tildes), and with at least as many backticks
-or tildes as the opening code fence. If the leading code fence is
-indented N spaces, then up to N spaces of indentation are removed from
-each line of the content (if present). (If a content line is not
-indented, it is preserved unchanged. If it is indented less than N
-spaces, all of the indentation is removed.)
-
-The closing code fence may be indented up to three spaces, and may be
-followed only by spaces, which are ignored. If the end of the
-containing block (or document) is reached and no closing code fence
-has been found, the code block contains all of the lines after the
-opening code fence until the end of the containing block (or
-document). (An alternative spec would require backtracking in the
-event that a closing code fence is not found. But this makes parsing
-much less efficient, and there seems to be no real down side to the
-behavior described here.)
-
-A fenced code block may interrupt a paragraph, and does not require
-a blank line either before or after.
+```````````````````````````````` example
+---
+Foo
+---
+Bar
+---
+Baz
+.
+
+
Foo
+
Bar
+
Baz
+````````````````````````````````
-The content of a code fence is treated as literal text, not parsed
-as inlines. The first word of the [info string] is typically used to
-specify the language of the code sample, and rendered in the `class`
-attribute of the `code` tag. However, this spec does not mandate any
-particular treatment of the [info string].
-Here is a simple example with backticks:
+Setext headings cannot be empty:
```````````````````````````````` example
-```
-<
- >
-```
+
+====
.
-
<
- >
-
+
====
````````````````````````````````
-With tildes:
+Setext heading text lines must not be interpretable as block
+constructs other than paragraphs. So, the line of dashes
+in these examples gets interpreted as a thematic break:
```````````````````````````````` example
-~~~
-<
- >
-~~~
+---
+---
.
-
<
- >
-
+
+
````````````````````````````````
-Fewer than three backticks is not enough:
```````````````````````````````` example
-``
-foo
-``
+- foo
+-----
.
-
foo
+
+
foo
+
+
````````````````````````````````
-The closing code fence must use the same character as the opening
-fence:
```````````````````````````````` example
-```
-aaa
-~~~
-```
+ foo
+---
.
-
+
````````````````````````````````
-The closing code fence must be at least as long as the opening fence:
+If you want a heading with `> foo` as its literal text, you can
+use backslash escapes:
```````````````````````````````` example
-````
-aaa
-```
-``````
+\> foo
+------
.
-
aaa
-```
-
+
> foo
````````````````````````````````
-```````````````````````````````` example
-~~~~
-aaa
-~~~
-~~~~
-.
-
aaa
-~~~
-
-````````````````````````````````
+**Compatibility note:** Most existing Markdown implementations
+do not allow the text of setext headings to span multiple lines.
+But there is no consensus about how to interpret
+``` markdown
+Foo
+bar
+---
+baz
+```
-Unclosed code blocks are closed by the end of the document
-(or the enclosing [block quote][block quotes] or [list item][list items]):
+One can find four different interpretations:
-```````````````````````````````` example
-```
-.
-
-````````````````````````````````
+1. paragraph "Foo", heading "bar", paragraph "baz"
+2. paragraph "Foo bar", thematic break, paragraph "baz"
+3. paragraph "Foo bar --- baz"
+4. heading "Foo bar", paragraph "baz"
+We find interpretation 4 most natural, and interpretation 4
+increases the expressive power of CommonMark, by allowing
+multiline headings. Authors who want interpretation 1 can
+put a blank line after the first paragraph:
```````````````````````````````` example
-`````
+Foo
-```
-aaa
+bar
+---
+baz
.
-
-```
-aaa
-
+
Foo
+
bar
+
baz
````````````````````````````````
+Authors who want interpretation 2 can put blank lines around
+the thematic break,
+
```````````````````````````````` example
-> ```
-> aaa
+Foo
+bar
-bbb
+---
+
+baz
.
-
-
aaa
-
-
-
bbb
+
Foo
+bar
+
+
baz
````````````````````````````````
-A code block can have all empty lines as its content:
+or use a thematic break that cannot count as a [setext heading
+underline], such as
```````````````````````````````` example
-```
-
-
-```
+Foo
+bar
+* * *
+baz
.
-
-
-
+
Foo
+bar
+
+
baz
````````````````````````````````
-A code block can be empty:
+Authors who want interpretation 3 can use backslash escapes:
```````````````````````````````` example
-```
-```
+Foo
+bar
+\---
+baz
.
-
+
Foo
+bar
+---
+baz
````````````````````````````````
-Fences can be indented. If the opening fence is indented,
-content lines will have equivalent opening indentation removed,
-if present:
+## Indented code blocks
+
+An [indented code block](@) is composed of one or more
+[indented chunks] separated by blank lines.
+An [indented chunk](@) is a sequence of non-blank lines,
+each preceded by four or more spaces of indentation. The contents of the code
+block are the literal contents of the lines, including trailing
+[line endings], minus four spaces of indentation.
+An indented code block has no [info string].
+
+An indented code block cannot interrupt a paragraph, so there must be
+a blank line between a paragraph and a following indented code block.
+(A blank line is not needed, however, between a code block and a following
+paragraph.)
```````````````````````````````` example
- ```
- aaa
-aaa
-```
+ a simple
+ indented code block
.
-
aaa
-aaa
+
a simple
+ indented code block
````````````````````````````````
+If there is any ambiguity between an interpretation of indentation
+as a code block and as indicating that material belongs to a [list
+item][list items], the list item interpretation takes precedence:
+
```````````````````````````````` example
- ```
-aaa
- aaa
-aaa
- ```
+ - foo
+
+ bar
.
-
+
````````````````````````````````
-Four spaces indentation produces an indented code block:
+
+The contents of a code block are literal text, and do not get parsed
+as Markdown:
```````````````````````````````` example
- ```
- aaa
- ```
+
+ *hi*
+
+ - one
.
-
```
-aaa
-```
+
<a/>
+*hi*
+
+- one
````````````````````````````````
-Closing fences may be indented by 0-3 spaces, and their indentation
-need not match that of the opening fence:
+Here we have three chunks separated by blank lines:
```````````````````````````````` example
-```
-aaa
- ```
+ chunk1
+
+ chunk2
+
+
+
+ chunk3
.
-
````````````````````````````````
-This is not a closing fence, because it is indented 4 spaces:
+Any initial spaces or tabs beyond four spaces of indentation will be included in
+the content, even in interior blank lines:
```````````````````````````````` example
-```
-aaa
- ```
+ chunk1
+
+ chunk2
.
-
aaa
- ```
+
chunk1
+
+ chunk2
````````````````````````````````
-
-Code fences (opening and closing) cannot contain internal spaces:
+An indented code block cannot interrupt a paragraph. (This
+allows hanging indents and the like.)
```````````````````````````````` example
-``` ```
-aaa
+Foo
+ bar
+
.
-
-aaa
+
Foo
+bar
````````````````````````````````
+However, any non-blank line with fewer than four spaces of indentation ends
+the code block immediately. So a paragraph may occur immediately
+after indented code:
+
```````````````````````````````` example
-~~~~~~
-aaa
-~~~ ~~
+ foo
+bar
.
-
aaa
-~~~ ~~
+
foo
+
bar
````````````````````````````````
-Fenced code blocks can interrupt paragraphs, and can be followed
-directly by paragraphs, without a blank line between:
+And indented code can occur immediately before and after other kinds of
+blocks:
```````````````````````````````` example
-foo
-```
-bar
-```
-baz
+# Heading
+ foo
+Heading
+------
+ foo
+----
.
-
foo
-
bar
+
Heading
+
foo
-
baz
+
Heading
+
foo
+
+
````````````````````````````````
-Other blocks can also occur before and after fenced code blocks
-without an intervening blank line:
+The first line can be preceded by more than four spaces of indentation:
```````````````````````````````` example
-foo
----
-~~~
-bar
-~~~
-# baz
+ foo
+ bar
.
-
foo
-
bar
+
foo
+bar
-
baz
````````````````````````````````
-An [info string] can be provided after the opening code fence.
-Although this spec doesn't mandate any particular treatment of
-the info string, the first word is typically used to specify
-the language of the code block. In HTML output, the language is
-normally indicated by adding a class to the `code` element consisting
-of `language-` followed by the language name.
+Blank lines preceding or following an indented code block
+are not included in it:
```````````````````````````````` example
-```ruby
-def foo(x)
- return 3
-end
-```
+
+
+ foo
+
+
.
-
def foo(x)
- return 3
-end
+
foo
````````````````````````````````
+Trailing spaces or tabs are included in the code block's content:
+
```````````````````````````````` example
-~~~~ ruby startline=3 $%@#$
-def foo(x)
- return 3
-end
-~~~~~~~
+ foo
.
-
def foo(x)
- return 3
-end
+
foo
````````````````````````````````
-```````````````````````````````` example
-````;
-````
-.
-
-````````````````````````````````
+## Fenced code blocks
-[Info strings] for backtick code blocks cannot contain backticks:
+A [code fence](@) is a sequence
+of at least three consecutive backtick characters (`` ` ``) or
+tildes (`~`). (Tildes and backticks cannot be mixed.)
+A [fenced code block](@)
+begins with a code fence, preceded by up to three spaces of indentation.
+
+The line with the opening code fence may optionally contain some text
+following the code fence; this is trimmed of leading and trailing
+spaces or tabs and called the [info string](@). If the [info string] comes
+after a backtick fence, it may not contain any backtick
+characters. (The reason for this restriction is that otherwise
+some inline code would be incorrectly interpreted as the
+beginning of a fenced code block.)
+
+The content of the code block consists of all subsequent lines, until
+a closing [code fence] of the same type as the code block
+began with (backticks or tildes), and with at least as many backticks
+or tildes as the opening code fence. If the leading code fence is
+preceded by N spaces of indentation, then up to N spaces of indentation are
+removed from each line of the content (if present). (If a content line is not
+indented, it is preserved unchanged. If it is indented N spaces or less, all
+of the indentation is removed.)
+
+The closing code fence may be preceded by up to three spaces of indentation, and
+may be followed only by spaces or tabs, which are ignored. If the end of the
+containing block (or document) is reached and no closing code fence
+has been found, the code block contains all of the lines after the
+opening code fence until the end of the containing block (or
+document). (An alternative spec would require backtracking in the
+event that a closing code fence is not found. But this makes parsing
+much less efficient, and there seems to be no real downside to the
+behavior described here.)
+
+A fenced code block may interrupt a paragraph, and does not require
+a blank line either before or after.
+
+The content of a code fence is treated as literal text, not parsed
+as inlines. The first word of the [info string] is typically used to
+specify the language of the code sample, and rendered in the `class`
+attribute of the `code` tag. However, this spec does not mandate any
+particular treatment of the [info string].
+
+Here is a simple example with backticks:
```````````````````````````````` example
-``` aa ```
-foo
+```
+<
+ >
+```
.
-
aa
-foo
+
<
+ >
+
````````````````````````````````
-[Info strings] for tilde code blocks can contain backticks and tildes:
+With tildes:
```````````````````````````````` example
-~~~ aa ``` ~~~
-foo
+~~~
+<
+ >
~~~
.
-
foo
+
<
+ >
````````````````````````````````
+Fewer than three backticks is not enough:
+
+```````````````````````````````` example
+``
+foo
+``
+.
+
foo
+````````````````````````````````
-Closing code fences cannot have [info strings]:
+The closing code fence must use the same character as the opening
+fence:
```````````````````````````````` example
```
-``` aaa
+aaa
+~~~
```
.
-
``` aaa
+
aaa
+~~~
````````````````````````````````
+```````````````````````````````` example
+~~~
+aaa
+```
+~~~
+.
+
aaa
+```
+
+````````````````````````````````
-## HTML blocks
-An [HTML block](@) is a group of lines that is treated
-as raw HTML (and will not be escaped in HTML output).
+The closing code fence must be at least as long as the opening fence:
-There are seven kinds of [HTML block], which can be defined by their
-start and end conditions. The block begins with a line that meets a
-[start condition](@) (after up to three spaces optional indentation).
-It ends with the first subsequent line that meets a matching [end
-condition](@), or the last line of the document, or the last line of
-the [container block](#container-blocks) containing the current HTML
-block, if no line is encountered that meets the [end condition]. If
-the first line meets both the [start condition] and the [end
-condition], the block will contain just that line.
+```````````````````````````````` example
+````
+aaa
+```
+``````
+.
+
aaa
+```
+
+````````````````````````````````
-1. **Start condition:** line begins with the string ``, `
`, or `` (case-insensitive; it
-need not match the start tag).
-2. **Start condition:** line begins with the string ``.
+```````````````````````````````` example
+~~~~
+aaa
+~~~
+~~~~
+.
+
aaa
+~~~
+
+````````````````````````````````
-3. **Start condition:** line begins with the string ``.\
-**End condition:** line contains the string `?>`.
-4. **Start condition:** line begins with the string ``.
+Unclosed code blocks are closed by the end of the document
+(or the enclosing [block quote][block quotes] or [list item][list items]):
-5. **Start condition:** line begins with the string
-``.
+```````````````````````````````` example
+```
+.
+
+````````````````````````````````
-6. **Start condition:** line begins the string `<` or ``
-followed by one of the strings (case-insensitive) `address`,
-`article`, `aside`, `base`, `basefont`, `blockquote`, `body`,
-`caption`, `center`, `col`, `colgroup`, `dd`, `details`, `dialog`,
-`dir`, `div`, `dl`, `dt`, `fieldset`, `figcaption`, `figure`,
-`footer`, `form`, `frame`, `frameset`,
-`h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`,
-`html`, `iframe`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`,
-`nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`,
-`section`, `source`, `summary`, `table`, `tbody`, `td`,
-`tfoot`, `th`, `thead`, `title`, `tr`, `track`, `ul`, followed
-by [whitespace], the end of the line, the string `>`, or
-the string `/>`.\
-**End condition:** line is followed by a [blank line].
-7. **Start condition:** line begins with a complete [open tag]
-(with any [tag name] other than `script`,
-`style`, or `pre`) or a complete [closing tag],
-followed only by [whitespace] or the end of the line.\
-**End condition:** line is followed by a [blank line].
+```````````````````````````````` example
+`````
-HTML blocks continue until they are closed by their appropriate
-[end condition], or the last line of the document or other [container
-block](#container-blocks). This means any HTML **within an HTML
-block** that might otherwise be recognised as a start condition will
-be ignored by the parser and passed through as-is, without changing
-the parser's state.
+```
+aaa
+.
+
` will not affect
-the parser state; as the HTML block was started in by start condition 6, it
-will end at any blank line. This can be surprising:
```````````````````````````````` example
-
-
-**Hello**,
+> ```
+> aaa
-_world_.
-
-
+bbb
.
-
-
-**Hello**,
-
world.
-
-
+
+
aaa
+
+
+
bbb
````````````````````````````````
-In this case, the HTML block is terminated by the newline — the `**Hello**`
-text remains verbatim — and regular parsing resumes, with a paragraph,
-emphasised `world` and inline and block HTML following.
-
-All types of [HTML blocks] except type 7 may interrupt
-a paragraph. Blocks of type 7 may not interrupt a paragraph.
-(This restriction is intended to prevent unwanted interpretation
-of long tags inside a wrapped paragraph as starting HTML blocks.)
-Some simple examples follow. Here are some basic HTML blocks
-of type 6:
+A code block can have all empty lines as its content:
```````````````````````````````` example
-
-
-
- hi
-
-
-
-
-okay.
-.
-
-
-
- hi
-
-
-
-
okay.
-````````````````````````````````
-
+```
-```````````````````````````````` example
-
````````````````````````````````
-Here we have two HTML blocks with a Markdown paragraph between them:
+Fences can be indented. If the opening fence is indented,
+content lines will have equivalent opening indentation removed,
+if present:
```````````````````````````````` example
-
-
-*Markdown*
-
-
+ ```
+ aaa
+aaa
+```
.
-
-
Markdown
-
+
aaa
+aaa
+
````````````````````````````````
-The tag on the first line can be partial, as long
-as it is split where there would be whitespace:
-
```````````````````````````````` example
-
-
+ ```
+aaa
+ aaa
+aaa
+ ```
.
-
-
+
aaa
+aaa
+aaa
+
````````````````````````````````
```````````````````````````````` example
-
-
+ ```
+ aaa
+ aaa
+ aaa
+ ```
.
-
-
+
aaa
+ aaa
+aaa
+
````````````````````````````````
-An open tag need not be closed:
-```````````````````````````````` example
-
-*foo*
+Four spaces of indentation is too many:
-*bar*
+```````````````````````````````` example
+ ```
+ aaa
+ ```
.
-
-*foo*
-
bar
+
```
+aaa
+```
+
````````````````````````````````
-
-A partial tag need not even be completed (garbage
-in, garbage out):
+Closing fences may be preceded by up to three spaces of indentation, and their
+indentation need not match that of the opening fence:
```````````````````````````````` example
-
aaa
+
````````````````````````````````
```````````````````````````````` example
-
aaa
+
````````````````````````````````
-The initial tag doesn't even need to be a valid
-tag, as long as it starts like one:
+This is not a closing fence, because it is indented 4 spaces:
```````````````````````````````` example
-
aaa
+ ```
+
````````````````````````````````
-In type 6 blocks, the initial tag need not be on a line by
-itself:
+
+Code fences (opening and closing) cannot contain internal spaces or tabs:
```````````````````````````````` example
-
````````````````````````````````
```````````````````````````````` example
-
-foo
-
+~~~~~~
+aaa
+~~~ ~~
.
-
-foo
-
+
aaa
+~~~ ~~
+
````````````````````````````````
-Everything until the next blank line or end of document
-gets included in the HTML block. So, in the following
-example, what looks like a Markdown code block
-is actually part of the HTML block, which continues until a blank
-line or the end of the document is reached:
+Fenced code blocks can interrupt paragraphs, and can be followed
+directly by paragraphs, without a blank line between:
```````````````````````````````` example
-
-``` c
-int x = 33;
+foo
```
-.
-
-``` c
-int x = 33;
+bar
```
+baz
+.
+
foo
+
bar
+
+
baz
````````````````````````````````
-To start an [HTML block] with a tag that is *not* in the
-list of block-level tags in (6), you must put the tag by
-itself on the first line (and it must be complete):
+Other blocks can also occur before and after fenced code blocks
+without an intervening blank line:
```````````````````````````````` example
-
-*bar*
-
+foo
+---
+~~~
+bar
+~~~
+# baz
.
-
-*bar*
-
+
foo
+
bar
+
+
baz
````````````````````````````````
-In type 7 blocks, the [tag name] can be anything:
+An [info string] can be provided after the opening code fence.
+Although this spec doesn't mandate any particular treatment of
+the info string, the first word is typically used to specify
+the language of the code block. In HTML output, the language is
+normally indicated by adding a class to the `code` element consisting
+of `language-` followed by the language name.
```````````````````````````````` example
-
-*bar*
-
+```ruby
+def foo(x)
+ return 3
+end
+```
.
-
-*bar*
-
+
````````````````````````````````
-These rules are designed to allow us to work with tags that
-can function as either block-level or inline-level tags.
-The `` tag is a nice example. We can surround content with
-`` tags in three different ways. In this case, we get a raw
-HTML block, because the `` tag is on a line by itself:
+[Info strings] for backtick code blocks cannot contain backticks:
```````````````````````````````` example
-
-*foo*
-
+``` aa ```
+foo
.
-
-*foo*
-
+
aa
+foo
````````````````````````````````
-In this case, we get a raw HTML block that just includes
-the `` tag (because it ends with the following blank
-line). So the contents get interpreted as CommonMark:
+[Info strings] for tilde code blocks can contain backticks and tildes:
```````````````````````````````` example
-
-
-*foo*
-
-
+~~~ aa ``` ~~~
+foo
+~~~
.
-
-
foo
-
+
foo
+
````````````````````````````````
-Finally, in this case, the `` tags are interpreted
-as [raw HTML] *inside* the CommonMark paragraph. (Because
-the tag is not on a line by itself, we get inline HTML
-rather than an [HTML block].)
+Closing code fences cannot have [info strings]:
```````````````````````````````` example
-*foo*
+```
+``` aaa
+```
.
-
foo
+
``` aaa
+
````````````````````````````````
-HTML tags designed to contain literal content
-(`script`, `style`, `pre`), comments, processing instructions,
-and declarations are treated somewhat differently.
-Instead of ending at the first blank line, these blocks
-end at the first line containing a corresponding end tag.
-As a result, these blocks can contain blank lines:
-A pre tag (type 1):
+## HTML blocks
-```````````````````````````````` example
-
-import Text.HTML.TagSoup
+An [HTML block](@) is a group of lines that is treated
+as raw HTML (and will not be escaped in HTML output).
-main :: IO ()
-main = print $ parseTags tags
-
-okay
-.
-
-import Text.HTML.TagSoup
+There are seven kinds of [HTML block], which can be defined by their
+start and end conditions. The block begins with a line that meets a
+[start condition](@) (after up to three optional spaces of indentation).
+It ends with the first subsequent line that meets a matching
+[end condition](@), or the last line of the document, or the last line of
+the [container block](#container-blocks) containing the current HTML
+block, if no line is encountered that meets the [end condition]. If
+the first line meets both the [start condition] and the [end
+condition], the block will contain just that line.
-main :: IO ()
-main = print $ parseTags tags
-
-
okay
-````````````````````````````````
+1. **Start condition:** line begins with the string `
`, or the end of the line.\
+**End condition:** line contains an end tag
+`
`, ``, ``, or `` (case-insensitive; it
+need not match the start tag).
+2. **Start condition:** line begins with the string ``.
-A script tag (type 1):
+3. **Start condition:** line begins with the string ``.\
+**End condition:** line contains the string `?>`.
-```````````````````````````````` example
-
-okay
-.
-
-
okay
-````````````````````````````````
+6. **Start condition:** line begins with the string `<` or ``
+followed by one of the strings (case-insensitive) `address`,
+`article`, `aside`, `base`, `basefont`, `blockquote`, `body`,
+`caption`, `center`, `col`, `colgroup`, `dd`, `details`, `dialog`,
+`dir`, `div`, `dl`, `dt`, `fieldset`, `figcaption`, `figure`,
+`footer`, `form`, `frame`, `frameset`,
+`h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`,
+`html`, `iframe`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`,
+`nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`,
+`search`, `section`, `summary`, `table`, `tbody`, `td`,
+`tfoot`, `th`, `thead`, `title`, `tr`, `track`, `ul`, followed
+by a space, a tab, the end of the line, the string `>`, or
+the string `/>`.\
+**End condition:** line is followed by a [blank line].
+7. **Start condition:** line begins with a complete [open tag]
+(with any [tag name] other than `pre`, `script`,
+`style`, or `textarea`) or a complete [closing tag],
+followed by zero or more spaces and tabs, followed by the end of the line.\
+**End condition:** line is followed by a [blank line].
-A style tag (type 1):
+HTML blocks continue until they are closed by their appropriate
+[end condition], or the last line of the document or other [container
+block](#container-blocks). This means any HTML **within an HTML
+block** that might otherwise be recognised as a start condition will
+be ignored by the parser and passed through as-is, without changing
+the parser's state.
+
+For instance, `
` within an HTML block started by `
` will not affect
+the parser state; as the HTML block was started in by start condition 6, it
+will end at any blank line. This can be surprising:
```````````````````````````````` example
-
-okay
+_world_.
+
+
.
-
-
okay
+
+
+**Hello**,
+
world.
+
+
````````````````````````````````
+In this case, the HTML block is terminated by the blank line — the `**Hello**`
+text remains verbatim — and regular parsing resumes, with a paragraph,
+emphasised `world` and inline and block HTML following.
-If there is no matching end tag, the block will end at the
-end of the document (or the enclosing [block quote][block quotes]
-or [list item][list items]):
-
-```````````````````````````````` example
-
+
*foo*
.
-
-
foo
+
+*foo*
````````````````````````````````
+Here we have two HTML blocks with a Markdown paragraph between them:
+
```````````````````````````````` example
-*bar*
-*baz*
+
+
+*Markdown*
+
+
.
-*bar*
-
baz
+
+
Markdown
+
````````````````````````````````
-Note that anything on the last line after the
-end tag will be included in the [HTML block]:
+The tag on the first line can be partial, as long
+as it is split where there would be whitespace:
```````````````````````````````` example
-1. *bar*
+
+
.
-1. *bar*
+
+
````````````````````````````````
-A comment (type 2):
+```````````````````````````````` example
+
+
+.
+
+
+````````````````````````````````
+
+An open tag need not be closed:
```````````````````````````````` example
-
-okay
+*bar*
.
-
-
okay
+
+*foo*
+
bar
````````````````````````````````
-A processing instruction (type 3):
+A partial tag need not even be completed (garbage
+in, garbage out):
```````````````````````````````` example
-';
-
-?>
-okay
+
````````````````````````````````
```````````````````````````````` example
-
-
-
+
+foo
+
.
-
-
<div>
-
+
+foo
+
````````````````````````````````
-An HTML block of types 1--6 can interrupt a paragraph, and need not be
-preceded by a blank line.
+Everything until the next blank line or end of document
+gets included in the HTML block. So, in the following
+example, what looks like a Markdown code block
+is actually part of the HTML block, which continues until a blank
+line or the end of the document is reached:
```````````````````````````````` example
-Foo
-
-bar
-
+
+``` c
+int x = 33;
+```
.
-
Foo
-
-bar
-
+
+``` c
+int x = 33;
+```
````````````````````````````````
-However, a following blank line is needed, except at the end of
-a document, and except for blocks of types 1--5, [above][HTML
-block]:
+To start an [HTML block] with a tag that is *not* in the
+list of block-level tags in (6), you must put the tag by
+itself on the first line (and it must be complete):
```````````````````````````````` example
-
-*foo*
+
+*bar*
+
````````````````````````````````
-HTML blocks of type 7 cannot interrupt a paragraph:
+In type 7 blocks, the [tag name] can be anything:
```````````````````````````````` example
-Foo
-
-baz
+
+*bar*
+
.
-
+
+*bar*
+
````````````````````````````````
-This rule differs from John Gruber's original Markdown syntax
-specification, which says:
+```````````````````````````````` example
+
+*bar*
+
+.
+
+*bar*
+
+````````````````````````````````
-> The only restrictions are that block-level HTML elements —
-> e.g. `
`, `
`, `
`, `
`, etc. — must be separated from
-> surrounding content by blank lines, and the start and end tags of the
-> block should not be indented with tabs or spaces.
-In some ways Gruber's rule is more restrictive than the one given
-here:
+```````````````````````````````` example
+
+*bar*
+.
+
+*bar*
+````````````````````````````````
-- It requires that an HTML block be preceded by a blank line.
-- It does not allow the start tag to be indented.
-- It requires a matching end tag, which it also does not allow to
- be indented.
-Most Markdown implementations (including some of Gruber's own) do not
-respect all of these restrictions.
+These rules are designed to allow us to work with tags that
+can function as either block-level or inline-level tags.
+The `` tag is a nice example. We can surround content with
+`` tags in three different ways. In this case, we get a raw
+HTML block, because the `` tag is on a line by itself:
-There is one respect, however, in which Gruber's rule is more liberal
-than the one given here, since it allows blank lines to occur inside
-an HTML block. There are two reasons for disallowing them here.
-First, it removes the need to parse balanced tags, which is
-expensive and can require backtracking from the end of the document
-if no matching end tag is found. Second, it provides a very simple
-and flexible way of including Markdown content inside HTML tags:
-simply separate the Markdown from the HTML using blank lines:
+```````````````````````````````` example
+
+*foo*
+
+.
+
+*foo*
+
+````````````````````````````````
-Compare:
+
+In this case, we get a raw HTML block that just includes
+the `` tag (because it ends with the following blank
+line). So the contents get interpreted as CommonMark:
```````````````````````````````` example
-
+
-*Emphasized* text.
+*foo*
-
+
.
-
-
Emphasized text.
-
+
+
foo
+
````````````````````````````````
+Finally, in this case, the `` tags are interpreted
+as [raw HTML] *inside* the CommonMark paragraph. (Because
+the tag is not on a line by itself, we get inline HTML
+rather than an [HTML block].)
+
```````````````````````````````` example
-
-*Emphasized* text.
-
+*foo*
.
-
-*Emphasized* text.
-
+
foo
````````````````````````````````
-Some Markdown implementations have adopted a convention of
-interpreting content inside tags as text if the open tag has
-the attribute `markdown=1`. The rule given above seems a simpler and
-more elegant way of achieving the same expressive power, which is also
-much simpler to parse.
+HTML tags designed to contain literal content
+(`pre`, `script`, `style`, `textarea`), comments, processing instructions,
+and declarations are treated somewhat differently.
+Instead of ending at the first blank line, these blocks
+end at the first line containing a corresponding end tag.
+As a result, these blocks can contain blank lines:
-The main potential drawback is that one can no longer paste HTML
-blocks into Markdown documents with 100% reliability. However,
-*in most cases* this will work fine, because the blank lines in
-HTML are usually followed by HTML block tags. For example:
+A pre tag (type 1):
```````````````````````````````` example
-
+A script tag (type 1):
+
+```````````````````````````````` example
+
+okay
.
-
-
-
-Hi
-
-
-
+
+
okay
````````````````````````````````
-There are problems, however, if the inner tags are indented
-*and* separated by spaces, as then they will be interpreted as
-an indented code block:
+A textarea tag (type 1):
```````````````````````````````` example
-
+
+
.
-
-
-
<td>
- Hi
-</td>
-
-
-
-````````````````````````````````
-
+
+````````````````````````````````
-A [link reference definition]
-does not correspond to a structural element of a document. Instead, it
-defines a label which can be used in [reference links]
-and reference-style [images] elsewhere in the document. [Link
-reference definitions] can come either before or after the links that use
-them.
+A style tag (type 1):
```````````````````````````````` example
-[foo]: /url "title"
+
+okay
.
-
````````````````````````````````
+If there is no matching end tag, the block will end at the
+end of the document (or the enclosing [block quote][block quotes]
+or [list item][list items]):
+
```````````````````````````````` example
- [foo]:
- /url
- 'the title'
+
+*foo*
.
-
````````````````````````````````
-However, it may not contain a [blank line]:
-
```````````````````````````````` example
-[foo]: /url 'title
-
-with blank line'
-
-[foo]
+*bar*
+*baz*
.
-
[foo]: /url 'title
-
with blank line'
-
[foo]
+*bar*
+
baz
````````````````````````````````
-The title may be omitted:
+Note that anything on the last line after the
+end tag will be included in the [HTML block]:
```````````````````````````````` example
-[foo]:
-/url
-
-[foo]
+1. *bar*
.
-
+1. *bar*
````````````````````````````````
-The link destination may not be omitted:
+A comment (type 2):
```````````````````````````````` example
-[foo]:
+
+okay
.
-
[foo]:
-
[foo]
-````````````````````````````````
+
+
okay
+````````````````````````````````
-```````````````````````````````` example
-[foo]: <>
-[foo]
-.
-
-````````````````````````````````
-The title must be separated from the link destination by
-whitespace:
+A processing instruction (type 3):
```````````````````````````````` example
-[foo]: (baz)
+';
+
+?>
+okay
.
-
[foo]: (baz)
-
[foo]
+';
+
+?>
+
okay
````````````````````````````````
-Both title and destination can contain backslash escapes
-and literal backslashes:
+A declaration (type 4):
```````````````````````````````` example
-[foo]: /url\bar\*baz "foo\"bar\baz"
-
-[foo]
+
.
-
+
````````````````````````````````
-A link can come before its corresponding definition:
+CDATA (type 5):
```````````````````````````````` example
-[foo]
-
-[foo]: url
-.
-
````````````````````````````````
-As noted in the section on [Links], matching of labels is
-case-insensitive (see [matches]).
+The opening tag can be preceded by up to three spaces of indentation, but not
+four:
```````````````````````````````` example
-[FOO]: /url
+
-[Foo]
+
.
-
````````````````````````````````
-Here is a link reference definition with no corresponding link.
-It contributes nothing to the document.
+An HTML block of types 1--6 can interrupt a paragraph, and need not be
+preceded by a blank line.
```````````````````````````````` example
-[foo]: /url
+Foo
+
+bar
+
.
+
Foo
+
+bar
+
````````````````````````````````
-Here is another one:
+However, a following blank line is needed, except at the end of
+a document, and except for blocks of types 1--5, [above][HTML
+block]:
```````````````````````````````` example
-[
-foo
-]: /url
+
bar
+
+*foo*
.
-
bar
+
+bar
+
+*foo*
````````````````````````````````
-This is not a link reference definition, because there are
-[non-whitespace characters] after the title:
+HTML blocks of type 7 cannot interrupt a paragraph:
```````````````````````````````` example
-[foo]: /url "title" ok
+Foo
+
+baz
.
-
````````````````````````````````
-This is a link reference definition, but it has no title:
-
-```````````````````````````````` example
-[foo]: /url
-"title" ok
-.
-
"title" ok
-````````````````````````````````
+This rule differs from John Gruber's original Markdown syntax
+specification, which says:
+> The only restrictions are that block-level HTML elements —
+> e.g. `
`, `
`, `
`, `
`, etc. — must be separated from
+> surrounding content by blank lines, and the start and end tags of the
+> block should not be indented with spaces or tabs.
-This is not a link reference definition, because it is indented
-four spaces:
+In some ways Gruber's rule is more restrictive than the one given
+here:
-```````````````````````````````` example
- [foo]: /url "title"
+- It requires that an HTML block be preceded by a blank line.
+- It does not allow the start tag to be indented.
+- It requires a matching end tag, which it also does not allow to
+ be indented.
-[foo]
-.
-
[foo]: /url "title"
-
-
[foo]
-````````````````````````````````
+Most Markdown implementations (including some of Gruber's own) do not
+respect all of these restrictions.
+There is one respect, however, in which Gruber's rule is more liberal
+than the one given here, since it allows blank lines to occur inside
+an HTML block. There are two reasons for disallowing them here.
+First, it removes the need to parse balanced tags, which is
+expensive and can require backtracking from the end of the document
+if no matching end tag is found. Second, it provides a very simple
+and flexible way of including Markdown content inside HTML tags:
+simply separate the Markdown from the HTML using blank lines:
-This is not a link reference definition, because it occurs inside
-a code block:
+Compare:
```````````````````````````````` example
-```
-[foo]: /url
-```
+
-[foo]
+*Emphasized* text.
+
+
.
-
[foo]: /url
-
-
[foo]
+
+
Emphasized text.
+
````````````````````````````````
-A [link reference definition] cannot interrupt a paragraph.
-
```````````````````````````````` example
-Foo
-[bar]: /baz
-
-[bar]
+
+*Emphasized* text.
+
.
-
Foo
-[bar]: /baz
-
[bar]
+
+*Emphasized* text.
+
````````````````````````````````
-However, it can directly follow other block elements, such as headings
-and thematic breaks, and it need not be followed by a blank line.
-
-```````````````````````````````` example
-# [Foo]
-[foo]: /url
-> bar
-.
-
-````````````````````````````````
+Some Markdown implementations have adopted a convention of
+interpreting content inside tags as text if the open tag has
+the attribute `markdown=1`. The rule given above seems a simpler and
+more elegant way of achieving the same expressive power, which is also
+much simpler to parse.
-```````````````````````````````` example
-[foo]: /url
-bar
-===
-[foo]
-.
-
-````````````````````````````````
+The main potential drawback is that one can no longer paste HTML
+blocks into Markdown documents with 100% reliability. However,
+*in most cases* this will work fine, because the blank lines in
+HTML are usually followed by HTML block tags. For example:
```````````````````````````````` example
-[foo]: /url
-===
-[foo]
-.
-
````````````````````````````````
-[Link reference definitions] can occur
-inside block containers, like lists and block quotations. They
-affect the entire document, not just the container in which they
-are defined:
+There are problems, however, if the inner tags are indented
+*and* separated by spaces, as then they will be interpreted as
+an indented code block:
```````````````````````````````` example
-[foo]
+
-Whether something is a [link reference definition] is
-independent of whether the link reference it defines is
-used in the document. Thus, for example, the following
-document contains just a link reference definition, and
-no visible content:
+
-```````````````````````````````` example
-[foo]: /url
+
.
-````````````````````````````````
+
+
+
<td>
+ Hi
+</td>
+
+
+
+````````````````````````````````
-## Paragraphs
+Fortunately, blank lines are usually not necessary and can be
+deleted. The exception is inside `
` tags, but as described
+[above][HTML blocks], raw HTML blocks starting with `
`
+*can* contain blank lines.
-A sequence of non-blank lines that cannot be interpreted as other
-kinds of blocks forms a [paragraph](@).
-The contents of the paragraph are the result of parsing the
-paragraph's raw content as inlines. The paragraph's raw content
-is formed by concatenating the lines and removing initial and final
-[whitespace].
+## Link reference definitions
-A simple example with two paragraphs:
+A [link reference definition](@)
+consists of a [link label], optionally preceded by up to three spaces of
+indentation, followed
+by a colon (`:`), optional spaces or tabs (including up to one
+[line ending]), a [link destination],
+optional spaces or tabs (including up to one
+[line ending]), and an optional [link
+title], which if it is present must be separated
+from the [link destination] by spaces or tabs.
+No further character may occur.
+
+A [link reference definition]
+does not correspond to a structural element of a document. Instead, it
+defines a label which can be used in [reference links]
+and reference-style [images] elsewhere in the document. [Link
+reference definitions] can come either before or after the links that use
+them.
```````````````````````````````` example
-aaa
+[foo]: /url "title"
-bbb
+[foo]
.
-
````````````````````````````````
-Lines after the first may be indented any amount, since indented
-code blocks cannot interrupt paragraphs.
+The title may extend over multiple lines:
```````````````````````````````` example
-aaa
- bbb
- ccc
+[foo]: /url '
+title
+line1
+line2
+'
+
+[foo]
.
-
````````````````````````````````
-However, the first line may be indented at most three spaces,
-or an indented code block will be triggered:
+However, it may not contain a [blank line]:
```````````````````````````````` example
- aaa
-bbb
+[foo]: /url 'title
+
+with blank line'
+
+[foo]
.
-
aaa
-bbb
+
[foo]: /url 'title
+
with blank line'
+
[foo]
````````````````````````````````
+The title may be omitted:
+
```````````````````````````````` example
- aaa
-bbb
+[foo]:
+/url
+
+[foo]
.
-
````````````````````````````````
-Final spaces are stripped before inline parsing, so a paragraph
-that ends with two or more spaces will not end with a [hard line
-break]:
+The link destination may not be omitted:
```````````````````````````````` example
-aaa
-bbb
+[foo]:
+
+[foo]
.
-
aaa
-bbb
+
[foo]:
+
[foo]
````````````````````````````````
+ However, an empty link destination may be specified using
+ angle brackets:
-## Blank lines
+```````````````````````````````` example
+[foo]: <>
-[Blank lines] between block-level elements are ignored,
-except for the role they play in determining whether a [list]
-is [tight] or [loose].
+[foo]
+.
+
+````````````````````````````````
-Blank lines at the beginning and end of the document are also ignored.
+The title must be separated from the link destination by
+spaces or tabs:
```````````````````````````````` example
-
-
-aaa
-
-
-# aaa
+[foo]: (baz)
-
+[foo]
.
-
aaa
-
aaa
+
[foo]: (baz)
+
[foo]
````````````````````````````````
+Both title and destination can contain backslash escapes
+and literal backslashes:
-# Container blocks
+```````````````````````````````` example
+[foo]: /url\bar\*baz "foo\"bar\baz"
-A [container block](#container-blocks) is a block that has other
-blocks as its contents. There are two basic kinds of container blocks:
-[block quotes] and [list items].
-[Lists] are meta-containers for [list items].
+[foo]
+.
+
+````````````````````````````````
-We define the syntax for container blocks recursively. The general
-form of the definition is:
-> If X is a sequence of blocks, then the result of
-> transforming X in such-and-such a way is a container of type Y
-> with these blocks as its content.
+A link can come before its corresponding definition:
-So, we explain what counts as a block quote or list item by explaining
-how these can be *generated* from their contents. This should suffice
-to define the syntax, although it does not give a recipe for *parsing*
-these constructions. (A recipe is provided below in the section entitled
-[A parsing strategy](#appendix-a-parsing-strategy).)
+```````````````````````````````` example
+[foo]
-## Block quotes
+[foo]: url
+.
+
+````````````````````````````````
-A [block quote marker](@)
-consists of 0-3 spaces of initial indent, plus (a) the character `>` together
-with a following space, or (b) a single character `>` not followed by a space.
-The following rules define [block quotes]:
+If there are several matching definitions, the first one takes
+precedence:
-1. **Basic case.** If a string of lines *Ls* constitute a sequence
- of blocks *Bs*, then the result of prepending a [block quote
- marker] to the beginning of each line in *Ls*
- is a [block quote](#block-quotes) containing *Bs*.
+```````````````````````````````` example
+[foo]
-2. **Laziness.** If a string of lines *Ls* constitute a [block
- quote](#block-quotes) with contents *Bs*, then the result of deleting
- the initial [block quote marker] from one or
- more lines in which the next [non-whitespace character] after the [block
- quote marker] is [paragraph continuation
- text] is a block quote with *Bs* as its content.
- [Paragraph continuation text](@) is text
- that will be parsed as part of the content of a paragraph, but does
- not occur at the beginning of the paragraph.
+[foo]: first
+[foo]: second
+.
+
+````````````````````````````````
-3. **Consecutiveness.** A document cannot contain two [block
- quotes] in a row unless there is a [blank line] between them.
-Nothing else counts as a [block quote](#block-quotes).
+As noted in the section on [Links], matching of labels is
+case-insensitive (see [matches]).
+
+```````````````````````````````` example
+[FOO]: /url
+
+[Foo]
+.
+
````````````````````````````````
-The spaces after the `>` characters can be omitted:
+Whether something is a [link reference definition] is
+independent of whether the link reference it defines is
+used in the document. Thus, for example, the following
+document contains just a link reference definition, and
+no visible content:
```````````````````````````````` example
-># Foo
->bar
-> baz
+[foo]: /url
.
-
-
Foo
-
bar
-baz
-
````````````````````````````````
-The `>` characters can be indented 1-3 spaces:
+Here is another one:
```````````````````````````````` example
- > # Foo
- > bar
- > baz
-.
-
-
Foo
-
bar
-baz
-
+[
+foo
+]: /url
+bar
+.
+
bar
````````````````````````````````
-Four spaces gives us a code block:
+This is not a link reference definition, because there are
+characters other than spaces or tabs after the title:
```````````````````````````````` example
- > # Foo
- > bar
- > baz
+[foo]: /url "title" ok
.
-
> # Foo
-> bar
-> baz
-
+
[foo]: /url "title" ok
````````````````````````````````
-The Laziness clause allows us to omit the `>` before
-[paragraph continuation text]:
+This is a link reference definition, but it has no title:
```````````````````````````````` example
-> # Foo
-> bar
-baz
+[foo]: /url
+"title" ok
.
-
-
Foo
-
bar
-baz
-
+
"title" ok
````````````````````````````````
-A block quote can contain some lazy and some non-lazy
-continuation lines:
+This is not a link reference definition, because it is indented
+four spaces:
```````````````````````````````` example
-> bar
-baz
-> foo
+ [foo]: /url "title"
+
+[foo]
.
-
-
bar
-baz
-foo
-
+
[foo]: /url "title"
+
+
[foo]
````````````````````````````````
-Laziness only applies to lines that would have been continuations of
-paragraphs had they been prepended with [block quote markers].
-For example, the `> ` cannot be omitted in the second line of
+This is not a link reference definition, because it occurs inside
+a code block:
-``` markdown
-> foo
-> ---
+```````````````````````````````` example
+```
+[foo]: /url
```
-without changing the meaning:
-
-```````````````````````````````` example
-> foo
----
+[foo]
.
-
-
foo
-
-
+
[foo]: /url
+
+
[foo]
````````````````````````````````
-Similarly, if we omit the `> ` in the second line of
-
-``` markdown
-> - foo
-> - bar
-```
-
-then the block quote ends after the first line:
+A [link reference definition] cannot interrupt a paragraph.
```````````````````````````````` example
-> - foo
-- bar
+Foo
+[bar]: /baz
+
+[bar]
.
-
-
-
foo
-
-
-
-
bar
-
+
Foo
+[bar]: /baz
+
[bar]
````````````````````````````````
-For the same reason, we can't omit the `> ` in front of
-subsequent lines of an indented or fenced code block:
+However, it can directly follow other block elements, such as headings
+and thematic breaks, and it need not be followed by a blank line.
```````````````````````````````` example
-> foo
- bar
+# [Foo]
+[foo]: /url
+> bar
.
+
````````````````````````````````
-
-Note that in the following case, we have a [lazy
-continuation line]:
-
```````````````````````````````` example
-> foo
- - bar
+[foo]: /url
+===
+[foo]
.
-
````````````````````````````````
-To see why, note that in
+Several [link reference definitions]
+can occur one after another, without intervening blank lines.
-```markdown
-> foo
-> - bar
-```
+```````````````````````````````` example
+[foo]: /foo-url "foo"
+[bar]: /bar-url
+ "bar"
+[baz]: /baz-url
-the `- bar` is indented too far to start a list, and can't
-be an indented code block because indented code blocks cannot
-interrupt paragraphs, so it is [paragraph continuation text].
+[foo],
+[bar],
+[baz]
+.
+
+````````````````````````````````
-A block quote can be empty:
+
+[Link reference definitions] can occur
+inside block containers, like lists and block quotations. They
+affect the entire document, not just the container in which they
+are defined:
```````````````````````````````` example
->
+[foo]
+
+> [foo]: /url
.
+
````````````````````````````````
+## Paragraphs
+
+A sequence of non-blank lines that cannot be interpreted as other
+kinds of blocks forms a [paragraph](@).
+The contents of the paragraph are the result of parsing the
+paragraph's raw content as inlines. The paragraph's raw content
+is formed by concatenating the lines and removing initial and final
+spaces or tabs.
+
+A simple example with two paragraphs:
+
```````````````````````````````` example
->
->
->
+aaa
+
+bbb
.
-
-
+
aaa
+
bbb
````````````````````````````````
-A block quote can have initial or final blank lines:
+Paragraphs can contain multiple lines, but no blank lines:
```````````````````````````````` example
->
-> foo
->
+aaa
+bbb
+
+ccc
+ddd
.
-
-
foo
-
+
aaa
+bbb
+
ccc
+ddd
````````````````````````````````
-A blank line always separates block quotes:
+Multiple blank lines between paragraphs have no effect:
```````````````````````````````` example
-> foo
+aaa
-> bar
+
+bbb
.
-
-
foo
-
-
-
bar
-
+
aaa
+
bbb
````````````````````````````````
-(Most current Markdown implementations, including John Gruber's
-original `Markdown.pl`, will parse this example as a single block quote
-with two paragraphs. But it seems better to allow the author to decide
-whether two block quotes or one are wanted.)
-
-Consecutiveness means that if we put these block quotes together,
-we get a single block quote:
+Leading spaces or tabs are skipped:
```````````````````````````````` example
-> foo
-> bar
+ aaa
+ bbb
.
-
-
foo
-bar
-
+
aaa
+bbb
````````````````````````````````
-To get a block quote with two paragraphs, use:
+Lines after the first may be indented any amount, since indented
+code blocks cannot interrupt paragraphs.
```````````````````````````````` example
-> foo
->
-> bar
+aaa
+ bbb
+ ccc
.
-
-
foo
-
bar
-
+
aaa
+bbb
+ccc
````````````````````````````````
-Block quotes can interrupt paragraphs:
+However, the first line may be preceded by up to three spaces of indentation.
+Four spaces of indentation is too many:
```````````````````````````````` example
-foo
+ aaa
+bbb
+.
+
aaa
+bbb
+````````````````````````````````
+
+
+```````````````````````````````` example
+ aaa
+bbb
+.
+
aaa
+
+
bbb
+````````````````````````````````
+
+
+Final spaces or tabs are stripped before inline parsing, so a paragraph
+that ends with two or more spaces will not end with a [hard line
+break]:
+
+```````````````````````````````` example
+aaa
+bbb
+.
+
aaa
+bbb
+````````````````````````````````
+
+
+## Blank lines
+
+[Blank lines] between block-level elements are ignored,
+except for the role they play in determining whether a [list]
+is [tight] or [loose].
+
+Blank lines at the beginning and end of the document are also ignored.
+
+```````````````````````````````` example
+
+
+aaa
+
+
+# aaa
+
+
+.
+
aaa
+
aaa
+````````````````````````````````
+
+
+
+# Container blocks
+
+A [container block](#container-blocks) is a block that has other
+blocks as its contents. There are two basic kinds of container blocks:
+[block quotes] and [list items].
+[Lists] are meta-containers for [list items].
+
+We define the syntax for container blocks recursively. The general
+form of the definition is:
+
+> If X is a sequence of blocks, then the result of
+> transforming X in such-and-such a way is a container of type Y
+> with these blocks as its content.
+
+So, we explain what counts as a block quote or list item by explaining
+how these can be *generated* from their contents. This should suffice
+to define the syntax, although it does not give a recipe for *parsing*
+these constructions. (A recipe is provided below in the section entitled
+[A parsing strategy](#appendix-a-parsing-strategy).)
+
+## Block quotes
+
+A [block quote marker](@),
+optionally preceded by up to three spaces of indentation,
+consists of (a) the character `>` together with a following space of
+indentation, or (b) a single character `>` not followed by a space of
+indentation.
+
+The following rules define [block quotes]:
+
+1. **Basic case.** If a string of lines *Ls* constitute a sequence
+ of blocks *Bs*, then the result of prepending a [block quote
+ marker] to the beginning of each line in *Ls*
+ is a [block quote](#block-quotes) containing *Bs*.
+
+2. **Laziness.** If a string of lines *Ls* constitute a [block
+ quote](#block-quotes) with contents *Bs*, then the result of deleting
+ the initial [block quote marker] from one or
+ more lines in which the next character other than a space or tab after the
+ [block quote marker] is [paragraph continuation
+ text] is a block quote with *Bs* as its content.
+ [Paragraph continuation text](@) is text
+ that will be parsed as part of the content of a paragraph, but does
+ not occur at the beginning of the paragraph.
+
+3. **Consecutiveness.** A document cannot contain two [block
+ quotes] in a row unless there is a [blank line] between them.
+
+Nothing else counts as a [block quote](#block-quotes).
+
+Here is a simple example:
+
+```````````````````````````````` example
+> # Foo
> bar
+> baz
.
-
foo
-
bar
+
Foo
+
bar
+baz
````````````````````````````````
-In general, blank lines are not needed before or after block
-quotes:
+The space or tab after the `>` characters can be omitted:
```````````````````````````````` example
-> aaa
-***
-> bbb
+># Foo
+>bar
+> baz
.
-
aaa
-
-
-
-
bbb
+
Foo
+
bar
+baz
````````````````````````````````
-However, because of laziness, a blank line is needed between
-a block quote and a following paragraph:
+The `>` characters can be preceded by up to three spaces of indentation:
```````````````````````````````` example
-> bar
-baz
+ > # Foo
+ > bar
+ > baz
.
+
Foo
bar
baz
````````````````````````````````
+Four spaces of indentation is too many:
+
```````````````````````````````` example
-> bar
+ > # Foo
+ > bar
+ > baz
+.
+
> # Foo
+> bar
+> baz
+
+````````````````````````````````
+
+
+The Laziness clause allows us to omit the `>` before
+[paragraph continuation text]:
+```````````````````````````````` example
+> # Foo
+> bar
baz
.
-
bar
+
Foo
+
bar
+baz
-
baz
````````````````````````````````
+A block quote can contain some lazy and some non-lazy
+continuation lines:
+
```````````````````````````````` example
> bar
->
baz
+> foo
.
-
bar
+
bar
+baz
+foo
-
baz
````````````````````````````````
-It is a consequence of the Laziness rule that any number
-of initial `>`s may be omitted on a continuation line of a
-nested block quote:
+Laziness only applies to lines that would have been continuations of
+paragraphs had they been prepended with [block quote markers].
+For example, the `> ` cannot be omitted in the second line of
+
+``` markdown
+> foo
+> ---
+```
+
+without changing the meaning:
```````````````````````````````` example
-> > > foo
-bar
+> foo
+---
.
-
-
-
foo
-bar
-
-
+
foo
+
````````````````````````````````
+Similarly, if we omit the `> ` in the second line of
+
+``` markdown
+> - foo
+> - bar
+```
+
+then the block quote ends after the first line:
+
```````````````````````````````` example
->>> foo
-> bar
->>baz
+> - foo
+- bar
.
-
-
-
foo
-bar
-baz
-
-
+
+
foo
+
+
+
bar
+
````````````````````````````````
-When including an indented code block in a block quote,
-remember that the [block quote marker] includes
-both the `>` and a following space. So *five spaces* are needed after
-the `>`:
+For the same reason, we can't omit the `> ` in front of
+subsequent lines of an indented or fenced code block:
```````````````````````````````` example
-> code
-
-> not code
+> foo
+ bar
.
-
code
+
foo
+
bar
+
+````````````````````````````````
+
+
+```````````````````````````````` example
+> ```
+foo
+```
+.
-
not code
+
+
foo
+
````````````````````````````````
+Note that in the following case, we have a [lazy
+continuation line]:
-## List items
+```````````````````````````````` example
+> foo
+ - bar
+.
+
+
foo
+- bar
+
+````````````````````````````````
+
+
+To see why, note that in
+
+```markdown
+> foo
+> - bar
+```
+
+the `- bar` is indented too far to start a list, and can't
+be an indented code block because indented code blocks cannot
+interrupt paragraphs, so it is [paragraph continuation text].
+
+A block quote can be empty:
+
+```````````````````````````````` example
+>
+.
+
+
+````````````````````````````````
+
+
+```````````````````````````````` example
+>
+>
+>
+.
+
+
+````````````````````````````````
+
+
+A block quote can have initial or final blank lines:
+
+```````````````````````````````` example
+>
+> foo
+>
+.
+
+
foo
+
+````````````````````````````````
+
+
+A blank line always separates block quotes:
+
+```````````````````````````````` example
+> foo
+
+> bar
+.
+
+
foo
+
+
+
bar
+
+````````````````````````````````
+
+
+(Most current Markdown implementations, including John Gruber's
+original `Markdown.pl`, will parse this example as a single block quote
+with two paragraphs. But it seems better to allow the author to decide
+whether two block quotes or one are wanted.)
+
+Consecutiveness means that if we put these block quotes together,
+we get a single block quote:
+
+```````````````````````````````` example
+> foo
+> bar
+.
+
+
foo
+bar
+
+````````````````````````````````
+
+
+To get a block quote with two paragraphs, use:
+
+```````````````````````````````` example
+> foo
+>
+> bar
+.
+
+
foo
+
bar
+
+````````````````````````````````
+
+
+Block quotes can interrupt paragraphs:
+
+```````````````````````````````` example
+foo
+> bar
+.
+
foo
+
+
bar
+
+````````````````````````````````
+
+
+In general, blank lines are not needed before or after block
+quotes:
+
+```````````````````````````````` example
+> aaa
+***
+> bbb
+.
+
+
aaa
+
+
+
+
bbb
+
+````````````````````````````````
+
+
+However, because of laziness, a blank line is needed between
+a block quote and a following paragraph:
+
+```````````````````````````````` example
+> bar
+baz
+.
+
+
bar
+baz
+
+````````````````````````````````
+
+
+```````````````````````````````` example
+> bar
+
+baz
+.
+
+
bar
+
+
baz
+````````````````````````````````
+
+
+```````````````````````````````` example
+> bar
+>
+baz
+.
+
+
bar
+
+
baz
+````````````````````````````````
+
+
+It is a consequence of the Laziness rule that any number
+of initial `>`s may be omitted on a continuation line of a
+nested block quote:
+
+```````````````````````````````` example
+> > > foo
+bar
+.
+
+
+
+
foo
+bar
+
+
+
+````````````````````````````````
+
+
+```````````````````````````````` example
+>>> foo
+> bar
+>>baz
+.
+
+
+
+
foo
+bar
+baz
+
+
+
+````````````````````````````````
+
+
+When including an indented code block in a block quote,
+remember that the [block quote marker] includes
+both the `>` and a following space of indentation. So *five spaces* are needed
+after the `>`:
+
+```````````````````````````````` example
+> code
+
+> not code
+.
+
+
code
+
+
+
+
not code
+
+````````````````````````````````
+
+
+
+## List items
A [list marker](@) is a
[bullet list marker] or an [ordered list marker].
@@ -3755,10 +4111,10 @@ in some browsers.)
The following rules define [list items]:
1. **Basic case.** If a sequence of lines *Ls* constitute a sequence of
- blocks *Bs* starting with a [non-whitespace character], and *M* is a
- list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces, then the result
- of prepending *M* and the following spaces to the first line of
- *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a
+ blocks *Bs* starting with a character other than a space or tab, and *M* is
+ a list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces of indentation,
+ then the result of prepending *M* and the following spaces to the first line
+ of *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a
list item with *Bs* as its contents. The type of the list item
(bullet or ordered) is determined by the type of its list marker.
If the list item is ordered, then it is also assigned a start
@@ -3823,8 +4179,8 @@ with two lines.
The most important thing to notice is that the position of
the text after the list marker determines how much indentation
is needed in subsequent blocks in the list item. If the list
-marker takes up two spaces, and there are three spaces between
-the list marker and the next [non-whitespace character], then blocks
+marker takes up two spaces of indentation, and there are three spaces between
+the list marker and the next character other than a space or tab, then blocks
must be indented five spaces in order to fall under the list
item.
@@ -3885,10 +4241,10 @@ put under the list item:
It is tempting to think of this in terms of columns: the continuation
-blocks must be indented at least to the column of the first
-[non-whitespace character] after the list marker. However, that is not quite right.
-The spaces after the list marker determine how much relative indentation
-is needed. Which column this indentation reaches will depend on
+blocks must be indented at least to the column of the first character other than
+a space or tab after the list marker. However, that is not quite right.
+The spaces of indentation after the list marker determine how much relative
+indentation is needed. Which column this indentation reaches will depend on
how the list item is embedded in other constructions, as shown by
this example:
@@ -3935,7 +4291,7 @@ far enough past the blockquote marker:
````````````````````````````````
-Note that at least one space is needed between the list marker and
+Note that at least one space or tab is needed between the list marker and
any following content, so these are not list items:
```````````````````````````````` example
@@ -4067,16 +4423,16 @@ A start number may not be negative:
2. **Item starting with indented code.** If a sequence of lines *Ls*
constitute a sequence of blocks *Bs* starting with an indented code
block, and *M* is a list marker of width *W* followed by
- one space, then the result of prepending *M* and the following
- space to the first line of *Ls*, and indenting subsequent lines of
- *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents.
+ one space of indentation, then the result of prepending *M* and the
+ following space to the first line of *Ls*, and indenting subsequent lines
+ of *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents.
If a line is empty, then it need not be indented. The type of the
list item (bullet or ordered) is determined by the type of its list
marker. If the list item is ordered, then it is also assigned a
start number, based on the ordered list marker.
-An indented code block will have to be indented four spaces beyond
-the edge of the region where text will be included in the list item.
+An indented code block will have to be preceded by four spaces of indentation
+beyond the edge of the region where text will be included in the list item.
In the following case that is 6 spaces:
```````````````````````````````` example
@@ -4112,8 +4468,8 @@ And in this case it is 11 spaces:
If the *first* block in the list item is an indented code block,
-then by rule #2, the contents must be indented *one* space after the
-list marker:
+then by rule #2, the contents must be preceded by *one* space of indentation
+after the list marker:
```````````````````````````````` example
indented code
@@ -4149,7 +4505,7 @@ paragraph
````````````````````````````````
-Note that an additional space indent is interpreted as space
+Note that an additional space of indentation is interpreted as space
inside the code block:
```````````````````````````````` example
@@ -4173,10 +4529,10 @@ inside the code block:
Note that rules #1 and #2 only apply to two cases: (a) cases
in which the lines to be included in a list item begin with a
-[non-whitespace character], and (b) cases in which
+character other than a space or tab, and (b) cases in which
they begin with an indented code
block. In a case like the following, where the first block begins with
-a three-space indent, the rules do not allow us to form a list item by
+three spaces of indentation, the rules do not allow us to form a list item by
indenting the whole thing and prepending a list marker:
```````````````````````````````` example
@@ -4201,8 +4557,8 @@ bar
````````````````````````````````
-This is not a significant restriction, because when a block begins
-with 1-3 spaces indent, the indentation can always be removed without
+This is not a significant restriction, because when a block is preceded by up to
+three spaces of indentation, the indentation can always be removed without
a change in interpretation, allowing rule #1 to be applied. So, in
the above case:
@@ -4222,11 +4578,10 @@ the above case:
3. **Item starting with a blank line.** If a sequence of lines *Ls*
starting with a single [blank line] constitute a (possibly empty)
- sequence of blocks *Bs*, not separated from each other by more than
- one blank line, and *M* is a list marker of width *W*,
+ sequence of blocks *Bs*, and *M* is a list marker of width *W*,
then the result of prepending *M* to the first line of *Ls*, and
- indenting subsequent lines of *Ls* by *W + 1* spaces, is a list
- item with *Bs* as its contents.
+ preceding subsequent lines of *Ls* by *W + 1* spaces of indentation, is a
+ list item with *Bs* as its contents.
If a line is empty, then it need not be indented. The type of the
list item (bullet or ordered) is determined by the type of its list
marker. If the list item is ordered, then it is also assigned a
@@ -4301,7 +4656,7 @@ Here is an empty bullet list item:
````````````````````````````````
-It does not matter whether there are spaces following the [list marker]:
+It does not matter whether there are spaces or tabs following the [list marker]:
```````````````````````````````` example
- foo
@@ -4358,9 +4713,9 @@ foo
4. **Indentation.** If a sequence of lines *Ls* constitutes a list item
- according to rule #1, #2, or #3, then the result of indenting each line
- of *Ls* by 1-3 spaces (the same for each line) also constitutes a
- list item with the same contents and attributes. If a line is
+ according to rule #1, #2, or #3, then the result of preceding each line
+ of *Ls* by up to three spaces of indentation (the same for each line) also
+ constitutes a list item with the same contents and attributes. If a line is
empty, then it need not be indented.
Indented one space:
@@ -4459,7 +4814,7 @@ Four spaces indent gives a code block:
5. **Laziness.** If a string of lines *Ls* constitute a [list
item](#list-items) with contents *Bs*, then the result of deleting
some or all of the indentation from one or more lines in which the
- next [non-whitespace character] after the indentation is
+ next character other than a space or tab after the indentation is
[paragraph continuation text] is a
list item with the same contents and attributes. The unindented
lines are called
@@ -4544,7 +4899,7 @@ continued here.
The rules for sublists follow from the general rules
[above][List items]. A sublist must be indented the same number
-of spaces a paragraph would need to be in order to be included
+of spaces of indentation a paragraph would need to be in order to be included
in the list item.
So, in this case we need two spaces indent:
@@ -4777,8 +5132,8 @@ The choice of four spaces is arbitrary. It can be learned, but it is
not likely to be guessed, and it trips up beginners regularly.
Would it help to adopt a two-space rule? The problem is that such
-a rule, together with the rule allowing 1--3 spaces indentation of the
-initial list marker, allows text that is indented *less than* the
+a rule, together with the rule allowing up to three spaces of indentation for
+the initial list marker, allows text that is indented *less than* the
original list marker to be included in the list item. For example,
`Markdown.pl` parses
@@ -4994,11 +5349,11 @@ by itself should be a paragraph followed by a nested sublist.
Since it is well established Markdown practice to allow lists to
interrupt paragraphs inside list items, the [principle of
uniformity] requires us to allow this outside list items as
-well. ([reStructuredText](http://docutils.sourceforge.net/rst.html)
+well. ([reStructuredText](https://docutils.sourceforge.net/rst.html)
takes a different approach, requiring blank lines before lists
even inside other list items.)
-In order to solve of unwanted lists in paragraphs with
+In order to solve the problem of unwanted lists in paragraphs with
hard-wrapped numerals, we allow only lists starting with `1` to
interrupt paragraphs. Thus,
@@ -5170,8 +5525,8 @@ item:
````````````````````````````````
-Note, however, that list items may not be indented more than
-three spaces. Here `- e` is treated as a paragraph continuation
+Note, however, that list items may not be preceded by more than
+three spaces of indentation. Here `- e` is treated as a paragraph continuation
line, because it is indented more than three spaces:
```````````````````````````````` example
@@ -5257,7 +5612,7 @@ So is this, with a empty second item:
````````````````````````````````
-These are loose lists, even though there is no space between the items,
+These are loose lists, even though there are no blank lines between the items,
because one of the items directly contains two block-level elements
with a blank line between them:
@@ -5278,572 +5633,233 @@ with a blank line between them:
d
-
-
-````````````````````````````````
-
-
-```````````````````````````````` example
-- a
-- b
-
- [ref]: /url
-- d
-.
-
-
-
a
-
-
-
b
-
-
-
d
-
-
-````````````````````````````````
-
-
-This is a tight list, because the blank lines are in a code block:
-
-```````````````````````````````` example
-- a
-- ```
- b
-
-
- ```
-- c
-.
-
-
a
-
-
b
-
-
-
-
-
c
-
-````````````````````````````````
-
-
-This is a tight list, because the blank line is between two
-paragraphs of a sublist. So the sublist is loose while
-the outer list is tight:
-
-```````````````````````````````` example
-- a
- - b
-
- c
-- d
-.
-
-
a
-
-
-
b
-
c
-
-
-
-
d
-
-````````````````````````````````
-
-
-This is a tight list, because the blank line is inside the
-block quote:
-
-```````````````````````````````` example
-* a
- > b
- >
-* c
-.
-
-
a
-
-
b
-
-
-
c
-
-````````````````````````````````
-
-
-This list is tight, because the consecutive block elements
-are not separated by blank lines:
-
-```````````````````````````````` example
-- a
- > b
- ```
- c
- ```
-- d
-.
-
-
a
-
-
b
-
-
c
-
-
-
d
-
-````````````````````````````````
-
-
-A single-paragraph list is tight:
-
-```````````````````````````````` example
-- a
-.
-
-
a
-
-````````````````````````````````
-
-
-```````````````````````````````` example
-- a
- - b
-.
-
-
a
-
-
b
-
-
-
-````````````````````````````````
-
-
-This list is loose, because of the blank line between the
-two block elements in the list item:
-
-```````````````````````````````` example
-1. ```
- foo
- ```
-
- bar
-.
-
-
-
foo
-
-
bar
-
-
-````````````````````````````````
-
-
-Here the outer list is loose, the inner list tight:
-
-```````````````````````````````` example
-* foo
- * bar
-
- baz
-.
-
-
-
foo
-
-
bar
-
-
baz
-
-
-````````````````````````````````
-
-
-```````````````````````````````` example
-- a
- - b
- - c
-
-- d
- - e
- - f
-.
-
-
-
a
-
-
b
-
c
-
-
-
-
d
-
-
e
-
f
-
-
-
-````````````````````````````````
-
-
-# Inlines
-
-Inlines are parsed sequentially from the beginning of the character
-stream to the end (left to right, in left-to-right languages).
-Thus, for example, in
-
-```````````````````````````````` example
-`hi`lo`
-.
-
hilo`
-````````````````````````````````
-
-`hi` is parsed as code, leaving the backtick at the end as a literal
-backtick.
-
-
-## Backslash escapes
-
-Any ASCII punctuation character may be backslash-escaped:
-
-```````````````````````````````` example
-\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~
-.
-
!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
-````````````````````````````````
-
-
-Backslashes before other characters are treated as literal
-backslashes:
-
-```````````````````````````````` example
-\→\A\a\ \3\φ\«
-.
-
\→\A\a\ \3\φ\«
-````````````````````````````````
-
-
-Escaped characters are treated as regular characters and do
-not have their usual Markdown meanings:
-
-```````````````````````````````` example
-\*not emphasized*
-\ not a tag
-\[not a link](/foo)
-\`not code`
-1\. not a list
-\* not a list
-\# not a heading
-\[foo]: /url "not a reference"
-\ö not a character entity
-.
-
*not emphasized*
-<br/> not a tag
-[not a link](/foo)
-`not code`
-1. not a list
-* not a list
-# not a heading
-[foo]: /url "not a reference"
-ö not a character entity
-````````````````````````````````
-
-
-If a backslash is itself escaped, the following character is not:
-
-```````````````````````````````` example
-\\*emphasis*
-.
-
\emphasis
-````````````````````````````````
-
-
-A backslash at the end of the line is a [hard line break]:
-
-```````````````````````````````` example
-foo\
-bar
-.
-
foo
-bar
-````````````````````````````````
-
-
-Backslash escapes do not work in code blocks, code spans, autolinks, or
-raw HTML:
-
-```````````````````````````````` example
-`` \[\` ``
-.
-
\[\`
-````````````````````````````````
-
-
-```````````````````````````````` example
- \[\]
-.
-
\[\]
-
-````````````````````````````````
-
-
-```````````````````````````````` example
-~~~
-\[\]
-~~~
-.
-
\[\]
-
-````````````````````````````````
-
-
-```````````````````````````````` example
-
-.
-
-````````````````````````````````
-
-
-[Decimal numeric character
-references](@)
-consist of `` + a string of 1--7 arabic digits + `;`. A
-numeric character reference is parsed as the corresponding
-Unicode character. Invalid Unicode code points will be replaced by
-the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons,
-the code point `U+0000` will also be replaced by `U+FFFD`.
-
-```````````````````````````````` example
-# Ӓ Ϡ
-.
-
# Ӓ Ϡ �
-````````````````````````````````
-
-
-[Hexadecimal numeric character
-references](@) consist of `` +
-either `X` or `x` + a string of 1-6 hexadecimal digits + `;`.
-They too are parsed as the corresponding Unicode character (this
-time specified with a hexadecimal numeral instead of decimal).
-
-```````````````````````````````` example
-" ആ ಫ
-.
-
" ആ ಫ
+
+
````````````````````````````````
-Here are some nonentities:
-
```````````````````````````````` example
-  &x;
-
-abcdef0;
-&ThisIsNotDefined; &hi?;
+- a
+- b
+
+ [ref]: /url
+- d
.
-
````````````````````````````````
-Strings that are not on the list of HTML5 named entities are not
-recognized as entity references either:
+This is a tight list, because the blank line is between two
+paragraphs of a sublist. So the sublist is loose while
+the outer list is tight:
```````````````````````````````` example
-&MadeUpEntity;
+- a
+ - b
+
+ c
+- d
.
-
&MadeUpEntity;
+
+
a
+
+
+
b
+
c
+
+
+
+
d
+
````````````````````````````````
-Entity and numeric character references are recognized in any
-context besides code spans or code blocks, including
-URLs, [link titles], and [fenced code block][] [info strings]:
+This is a tight list, because the blank line is inside the
+block quote:
```````````````````````````````` example
-
+* a
+ > b
+ >
+* c
.
-
+
````````````````````````````````
-```````````````````````````````` example
-[foo]
+A single-paragraph list is tight:
-[foo]: /föö "föö"
+```````````````````````````````` example
+- a
.
-
````````````````````````````````
```````````````````````````````` example
-``` föö
-foo
-```
+- a
+ - b
.
-
foo
-
+
+
a
+
+
b
+
+
+
````````````````````````````````
-Entity and numeric character references are treated as literal
-text in code spans and code blocks:
+This list is loose, because of the blank line between the
+two block elements in the list item:
```````````````````````````````` example
-`föö`
-.
-
föö
-````````````````````````````````
-
+1. ```
+ foo
+ ```
-```````````````````````````````` example
- föfö
+ bar
.
-
föfö
+
+
+
foo
+
bar
+
+
````````````````````````````````
-Entity and numeric character references cannot be used
-in place of symbols indicating structure in CommonMark
-documents.
+Here the outer list is loose, the inner list tight:
```````````````````````````````` example
-*foo*
-*foo*
+* foo
+ * bar
+
+ baz
.
-
*foo*
-foo
+
+
+
foo
+
+
bar
+
+
baz
+
+
````````````````````````````````
+
```````````````````````````````` example
-* foo
+- a
+ - b
+ - c
-* foo
+- d
+ - e
+ - f
.
-
* foo
-
foo
+
+
a
+
+
b
+
c
+
+
+
+
d
+
+
e
+
f
+
+
````````````````````````````````
-```````````````````````````````` example
-foo
bar
-.
-
foo
-bar
-````````````````````````````````
+# Inlines
+
+Inlines are parsed sequentially from the beginning of the character
+stream to the end (left to right, in left-to-right languages).
+Thus, for example, in
```````````````````````````````` example
- foo
+`hi`lo`
.
-
→foo
+
hilo`
````````````````````````````````
+`hi` is parsed as code, leaving the backtick at the end as a literal
+backtick.
-```````````````````````````````` example
-[a](url "tit")
-.
-
[a](url "tit")
-````````````````````````````````
## Code spans
@@ -5854,7 +5870,7 @@ preceded nor followed by a backtick.
A [code span](@) begins with a backtick string and ends with
a backtick string of equal length. The contents of the code span are
-the characters between the two backtick strings, normalized in the
+the characters between these two backtick strings, normalized in the
following ways:
- First, [line endings] are converted to [spaces].
@@ -6038,18 +6054,18 @@ But this is an HTML tag:
And this is code:
```````````````````````````````` example
-``
+``
.
-
<http://foo.bar.baz>`
+
<https://foo.bar.baz>`
````````````````````````````````
But this is an autolink:
```````````````````````````````` example
-`
+`
.
-
````````````````````````````````
@@ -6082,7 +6098,7 @@ closing backtick strings to be equal in length:
## Emphasis and strong emphasis
John Gruber's original [Markdown syntax
-description](http://daringfireball.net/projects/markdown/syntax#em) says:
+description](https://daringfireball.net/projects/markdown/syntax#em) says:
> Markdown treats asterisks (`*`) and underscores (`_`) as indicators of
> emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML
@@ -6133,17 +6149,17 @@ a non-backslash-escaped `_` character.
A [left-flanking delimiter run](@) is
a [delimiter run] that is (1) not followed by [Unicode whitespace],
-and either (2a) not followed by a [punctuation character], or
-(2b) followed by a [punctuation character] and
-preceded by [Unicode whitespace] or a [punctuation character].
+and either (2a) not followed by a [Unicode punctuation character], or
+(2b) followed by a [Unicode punctuation character] and
+preceded by [Unicode whitespace] or a [Unicode punctuation character].
For purposes of this definition, the beginning and the end of
the line count as Unicode whitespace.
A [right-flanking delimiter run](@) is
a [delimiter run] that is (1) not preceded by [Unicode whitespace],
-and either (2a) not preceded by a [punctuation character], or
-(2b) preceded by a [punctuation character] and
-followed by [Unicode whitespace] or a [punctuation character].
+and either (2a) not preceded by a [Unicode punctuation character], or
+(2b) preceded by a [Unicode punctuation character] and
+followed by [Unicode whitespace] or a [Unicode punctuation character].
For purposes of this definition, the beginning and the end of
the line count as Unicode whitespace.
@@ -6184,7 +6200,7 @@ Here are some examples of delimiter runs.
(The idea of distinguishing left-flanking and right-flanking
delimiter runs based on the character before and the character
after comes from Roopesh Chander's
-[vfmd](http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags).
+[vfmd](https://web.archive.org/web/20220608143320/http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags).
vfmd uses the terminology "emphasis indicator string" instead of "delimiter
run," and its rules for distinguishing left- and right-flanking runs
are a bit more complex than the ones given here.)
@@ -6198,7 +6214,7 @@ The following rules define emphasis and strong emphasis:
it is part of a [left-flanking delimiter run]
and either (a) not part of a [right-flanking delimiter run]
or (b) part of a [right-flanking delimiter run]
- preceded by punctuation.
+ preceded by a [Unicode punctuation character].
3. A single `*` character [can close emphasis](@)
iff it is part of a [right-flanking delimiter run].
@@ -6207,7 +6223,7 @@ The following rules define emphasis and strong emphasis:
it is part of a [right-flanking delimiter run]
and either (a) not part of a [left-flanking delimiter run]
or (b) part of a [left-flanking delimiter run]
- followed by punctuation.
+ followed by a [Unicode punctuation character].
5. A double `**` [can open strong emphasis](@)
iff it is part of a [left-flanking delimiter run].
@@ -6216,7 +6232,7 @@ The following rules define emphasis and strong emphasis:
it is part of a [left-flanking delimiter run]
and either (a) not part of a [right-flanking delimiter run]
or (b) part of a [right-flanking delimiter run]
- preceded by punctuation.
+ preceded by a [Unicode punctuation character].
7. A double `**` [can close strong emphasis](@)
iff it is part of a [right-flanking delimiter run].
@@ -6225,7 +6241,7 @@ The following rules define emphasis and strong emphasis:
it is part of a [right-flanking delimiter run]
and either (a) not part of a [left-flanking delimiter run]
or (b) part of a [left-flanking delimiter run]
- followed by punctuation.
+ followed by a [Unicode punctuation character].
9. Emphasis begins with a delimiter that [can open emphasis] and ends
with a delimiter that [can close emphasis], and that uses the same
@@ -6326,6 +6342,21 @@ Unicode nonbreaking spaces count as whitespace, too:
````````````````````````````````
+Unicode symbols count as punctuation, too:
+
+```````````````````````````````` example
+*$*alpha.
+
+*£*bravo.
+
+*€*charlie.
+.
+
*$*alpha.
+
*£*bravo.
+
*€*charlie.
+````````````````````````````````
+
+
Intraword emphasis with `*` is permitted:
```````````````````````````````` example
@@ -6437,7 +6468,7 @@ whitespace:
````````````````````````````````
-A newline also counts as whitespace:
+A line ending also counts as whitespace:
```````````````````````````````` example
*foo bar
@@ -6602,7 +6633,7 @@ __ foo bar__
````````````````````````````````
-A newline counts as whitespace:
+A line ending counts as whitespace:
```````````````````````````````` example
__
foo bar__
@@ -6881,7 +6912,7 @@ emphasis sections in this example:
The same condition ensures that the following
cases are all strong emphasis nested inside
-emphasis, even when the interior spaces are
+emphasis, even when the interior whitespace is
omitted:
@@ -7411,16 +7442,16 @@ _a `_`_
```````````````````````````````` example
-**a
+**a
.
-
````````````````````````````````
@@ -7458,13 +7489,14 @@ following rules apply:
A [link destination](@) consists of either
- a sequence of zero or more characters between an opening `<` and a
- closing `>` that contains no line breaks or unescaped
+ closing `>` that contains no line endings or unescaped
`<` or `>` characters, or
-- a nonempty sequence of characters that does not start with
- `<`, does not include ASCII space or control characters, and
- includes parentheses only if (a) they are backslash-escaped or
- (b) they are part of a balanced pair of unescaped parentheses.
+- a nonempty sequence of characters that does not start with `<`,
+ does not include [ASCII control characters][ASCII control character]
+ or [space] character, and includes parentheses only if (a) they are
+ backslash-escaped or (b) they are part of a balanced pair of
+ unescaped parentheses.
(Implementations may impose limits on parentheses nesting to
avoid performance issues, but at least three levels of nesting
should be supported.)
@@ -7487,10 +7519,14 @@ Although [link titles] may span multiple lines, they may not contain
a [blank line].
An [inline link](@) consists of a [link text] followed immediately
-by a left parenthesis `(`, optional [whitespace], an optional
-[link destination], an optional [link title] separated from the link
-destination by [whitespace], optional [whitespace], and a right
-parenthesis `)`. The link's text consists of the inlines contained
+by a left parenthesis `(`, an optional [link destination], an optional
+[link title], and a right parenthesis `)`.
+These four components may be separated by spaces, tabs, and up to one line
+ending.
+If both [link destination] and [link title] are present, they *must* be
+separated by spaces, tabs, and up to one line ending.
+
+The link's text consists of the inlines contained
in the [link text] (excluding the enclosing square brackets).
The link's URI consists of the link destination, excluding enclosing
`<...>` if present, with backslash-escapes in effect as described
@@ -7507,7 +7543,8 @@ Here is a simple inline link:
````````````````````````````````
-The title may be omitted:
+The title, the link text and even
+the destination may be omitted:
```````````````````````````````` example
[link](/uri)
@@ -7515,8 +7552,12 @@ The title may be omitted:
````````````````````````````````
+```````````````````````````````` example
+[](./target.md)
+.
+
+````````````````````````````````
-Both the title and the destination may be omitted:
```````````````````````````````` example
[link]()
@@ -7531,6 +7572,13 @@ Both the title and the destination may be omitted:
````````````````````````````````
+
+```````````````````````````````` example
+[]()
+.
+
+````````````````````````````````
+
The destination can only contain spaces if it is
enclosed in pointy brackets:
@@ -7546,7 +7594,7 @@ enclosed in pointy brackets:
````````````````````````````````
-The destination cannot contain line breaks,
+The destination cannot contain line endings,
even if enclosed in pointy brackets:
```````````````````````````````` example
@@ -7615,6 +7663,13 @@ balanced:
However, if you have unbalanced parentheses, you need to escape or use the
`<...>` form:
+```````````````````````````````` example
+[link](foo(and(bar))
+.
+
[link](foo(and(bar))
+````````````````````````````````
+
+
```````````````````````````````` example
[link](foo\(and\(bar\))
.
@@ -7644,13 +7699,13 @@ A link can contain fragment identifiers and queries:
```````````````````````````````` example
[link](#fragment)
-[link](http://example.com#fragment)
+[link](https://example.com#fragment)
-[link](http://example.com?foo=3#frag)
+[link](https://example.com?foo=3#frag)
.
````````````````````````````````
@@ -7714,7 +7769,8 @@ may be used in titles:
````````````````````````````````
-Titles must be separated from the link using a [whitespace].
+Titles must be separated from the link using spaces, tabs, and up to one line
+ending.
Other [Unicode whitespace] like non-breaking space doesn't work.
```````````````````````````````` example
@@ -7757,7 +7813,8 @@ titles with no closing quotation mark, though 1.0.2b8 does not.
It seems preferable to adopt a simple, rational rule that works
the same way in inline links and link reference definitions.)
-[Whitespace] is allowed around the destination and title:
+Spaces, tabs, and up to one line ending is allowed around the destination and
+title:
```````````````````````````````` example
[link]( /uri
@@ -7892,9 +7949,9 @@ and autolinks over link grouping:
```````````````````````````````` example
-[foo
+[foo
.
-
````````````````````````````````
@@ -7908,7 +7965,8 @@ that [matches] a [link reference definition] elsewhere in the document.
A [link label](@) begins with a left bracket (`[`) and ends
with the first right bracket (`]`) that is not backslash-escaped.
-Between these brackets there must be at least one [non-whitespace character].
+Between these brackets there must be at least one character that is not a space,
+tab, or line ending.
Unescaped square bracket characters are not allowed inside the
opening and closing square brackets of [link labels]. A link
label can have at most 999 characters inside the square
@@ -7918,14 +7976,13 @@ One label [matches](@)
another just in case their normalized forms are equal. To normalize a
label, strip off the opening and closing brackets,
perform the *Unicode case fold*, strip leading and trailing
-[whitespace] and collapse consecutive internal
-[whitespace] to a single space. If there are multiple
+spaces, tabs, and line endings, and collapse consecutive internal
+spaces, tabs, and line endings to a single space. If there are multiple
matching reference link definitions, the one that comes first in the
document is used. (It is desirable in such cases to emit a warning.)
-The contents of the first link label are parsed as inlines, which are
-used as the link's text. The link's URI and title are provided by the
-matching [link reference definition].
+The link's URI and title are provided by the matching [link
+reference definition].
Here is a simple example:
@@ -8018,11 +8075,11 @@ emphasis grouping:
```````````````````````````````` example
-[foo *bar][ref]
+[foo *bar][ref]*
[ref]: /uri
.
-
````````````````````````````````
@@ -8048,11 +8105,11 @@ and autolinks over link grouping:
```````````````````````````````` example
-[foo
+[foo
[ref]: /uri
.
-
````````````````````````````````
@@ -8070,15 +8127,15 @@ Matching is case-insensitive:
Unicode case fold is used:
```````````````````````````````` example
-[Толпой][Толпой] is a Russian word.
+[ẞ]
-[ТОЛПОЙ]: /url
+[SS]: /url
.
-
````````````````````````````````
-Consecutive internal [whitespace] is treated as one space for
+Consecutive internal spaces, tabs, and line endings are treated as one space for
purposes of determining matching:
```````````````````````````````` example
@@ -8091,7 +8148,7 @@ purposes of determining matching:
````````````````````````````````
-No [whitespace] is allowed between the [link text] and the
+No spaces, tabs, or line endings are allowed between the [link text] and the
[link label]:
```````````````````````````````` example
@@ -8221,7 +8278,8 @@ Note that in this example `]` is not backslash-escaped:
````````````````````````````````
-A [link label] must contain at least one [non-whitespace character]:
+A [link label] must contain at least one character that is not a space, tab, or
+line ending:
```````````````````````````````` example
[]
@@ -8251,7 +8309,7 @@ A [collapsed reference link](@)
consists of a [link label] that [matches] a
[link reference definition] elsewhere in the
document, followed by the string `[]`.
-The contents of the first link label are parsed as inlines,
+The contents of the link label are parsed as inlines,
which are used as the link's text. The link's URI and title are
provided by the matching reference link definition. Thus,
`[foo][]` is equivalent to `[foo][foo]`.
@@ -8286,7 +8344,7 @@ The link labels are case-insensitive:
-As with full reference links, [whitespace] is not
+As with full reference links, spaces, tabs, or line endings are not
allowed between the two sets of brackets:
```````````````````````````````` example
@@ -8304,7 +8362,7 @@ A [shortcut reference link](@)
consists of a [link label] that [matches] a
[link reference definition] elsewhere in the
document and is not followed by `[]` or a link label.
-The contents of the first link label are parsed as inlines,
+The contents of the link label are parsed as inlines,
which are used as the link's text. The link's URI and title
are provided by the matching link reference definition.
Thus, `[foo]` is equivalent to `[foo][]`.
@@ -8391,7 +8449,7 @@ following closing bracket:
````````````````````````````````
-Full and compact references take precedence over shortcut
+Full and collapsed references take precedence over shortcut
references:
```````````````````````````````` example
@@ -8614,7 +8672,7 @@ The labels are case-insensitive:
````````````````````````````````
-As with reference links, [whitespace] is not allowed
+As with reference links, spaces, tabs, and line endings, are not allowed
between the two sets of brackets:
```````````````````````````````` example
@@ -8707,9 +8765,9 @@ a link to the URI, with the URI as the link's label.
An [absolute URI](@),
for these purposes, consists of a [scheme] followed by a colon (`:`)
-followed by zero or more characters other than ASCII
-[whitespace] and control characters, `<`, and `>`. If
-the URI includes these characters, they must be percent-encoded
+followed by zero or more characters other than [ASCII control
+characters][ASCII control character], [space], `<`, and `>`.
+If the URI includes these characters, they must be percent-encoded
(e.g. `%20` for a space).
For purposes of this spec, a [scheme](@) is any sequence
@@ -8727,9 +8785,9 @@ Here are some valid autolinks:
```````````````````````````````` example
-
+
.
-
````````````````````````````````
@@ -8785,18 +8843,18 @@ with their syntax:
Spaces are not allowed in autolinks:
```````````````````````````````` example
-
+
.
-
<http://foo.bar/baz bim>
+
<https://foo.bar/baz bim>
````````````````````````````````
Backslash-escapes do not work inside autolinks:
```````````````````````````````` example
-
+
.
-
````````````````````````````````
@@ -8848,9 +8906,9 @@ These are not autolinks:
```````````````````````````````` example
-< http://foo.bar >
+< https://foo.bar >
.
-
< http://foo.bar >
+
< https://foo.bar >
````````````````````````````````
@@ -8869,9 +8927,9 @@ These are not autolinks:
```````````````````````````````` example
-http://example.com
+https://example.com
.
-
http://example.com
+
https://example.com
````````````````````````````````
@@ -8895,7 +8953,7 @@ A [tag name](@) consists of an ASCII letter
followed by zero or more ASCII letters, digits, or
hyphens (`-`).
-An [attribute](@) consists of [whitespace],
+An [attribute](@) consists of spaces, tabs, and up to one line ending,
an [attribute name], and an optional
[attribute value specification].
@@ -8905,9 +8963,9 @@ letters, digits, `_`, `.`, `:`, or `-`. (Note: This is the XML
specification restricted to ASCII. HTML5 is laxer.)
An [attribute value specification](@)
-consists of optional [whitespace],
-a `=` character, optional [whitespace], and an [attribute
-value].
+consists of optional spaces, tabs, and up to one line ending,
+a `=` character, optional spaces, tabs, and up to one line ending,
+and an [attribute value].
An [attribute value](@)
consists of an [unquoted attribute value],
@@ -8915,7 +8973,7 @@ a [single-quoted attribute value], or a [double-quoted attribute value].
An [unquoted attribute value](@)
is a nonempty string of characters not
-including [whitespace], `"`, `'`, `=`, `<`, `>`, or `` ` ``.
+including spaces, tabs, line endings, `"`, `'`, `=`, `<`, `>`, or `` ` ``.
A [single-quoted attribute value](@)
consists of `'`, zero or more
@@ -8926,26 +8984,24 @@ consists of `"`, zero or more
characters not including `"`, and a final `"`.
An [open tag](@) consists of a `<` character, a [tag name],
-zero or more [attributes], optional [whitespace], an optional `/`
-character, and a `>` character.
+zero or more [attributes], optional spaces, tabs, and up to one line ending,
+an optional `/` character, and a `>` character.
A [closing tag](@) consists of the string ``, a
-[tag name], optional [whitespace], and the character `>`.
+[tag name], optional spaces, tabs, and up to one line ending, and the character
+`>`.
-An [HTML comment](@) consists of ``,
-where *text* does not start with `>` or `->`, does not end with `-`,
-and does not contain `--`. (See the
-[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).)
+An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the
+[HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)).
A [processing instruction](@)
consists of the string ``, a string
of characters not including the string `?>`, and the string
`?>`.
-A [declaration](@) consists of the
-string ``, and the character `>`.
+A [declaration](@) consists of the string ``, and the character `>`.
A [CDATA section](@) consists of
the string `<
@@ -9046,7 +9102,7 @@ bim!bop />
````````````````````````````````
-Missing [whitespace]:
+Missing whitespace:
```````````````````````````````` example
@@ -9076,30 +9132,20 @@ Illegal attributes in closing tag:
Comments:
```````````````````````````````` example
-foo
+foo
.
-
+ * Line terminators can be either a line feed {@code "\n"}, carriage return {@code "\r"}, or a carriage return followed
+ * by a line feed {@code "\r\n"}. Call {@link #getLineTerminator()} after {@link #readLine()} to obtain the
+ * corresponding line terminator. If a stream has a line at the end without a terminator, {@link #getLineTerminator()}
+ * returns {@code null}.
+ */
+public class LineReader implements Closeable {
+
+ // Same as java.io.BufferedReader
+ static final int CHAR_BUFFER_SIZE = 8192;
+ static final int EXPECTED_LINE_LENGTH = 80;
+
+ private Reader reader;
+ private char[] cbuf;
+
+ private int position = 0;
+ private int limit = 0;
+
+ private String lineTerminator = null;
+
+ public LineReader(Reader reader) {
+ this.reader = reader;
+ this.cbuf = new char[CHAR_BUFFER_SIZE];
+ }
+
+ /**
+ * Read a line of text.
+ *
+ * @return the line, or {@code null} when the end of the stream has been reached and no more lines can be read
+ */
+ public String readLine() throws IOException {
+ StringBuilder sb = null;
+ boolean cr = false;
+
+ while (true) {
+ if (position >= limit) {
+ fill();
+ }
+
+ if (cr) {
+ // We saw a CR before, check if we have CR LF or just CR.
+ if (position < limit && cbuf[position] == '\n') {
+ position++;
+ return line(sb.toString(), "\r\n");
+ } else {
+ return line(sb.toString(), "\r");
+ }
+ }
+
+ if (position >= limit) {
+ // End of stream, return either the last line without terminator or null for end.
+ return line(sb != null ? sb.toString() : null, null);
+ }
+
+ int start = position;
+ int i = position;
+ for (; i < limit; i++) {
+ char c = cbuf[i];
+ if (c == '\n') {
+ position = i + 1;
+ return line(finish(sb, start, i), "\n");
+ } else if (c == '\r') {
+ if (i + 1 < limit) {
+ // We know what the next character is, so we can check now whether we have
+ // a CR LF or just a CR and return.
+ if (cbuf[i + 1] == '\n') {
+ position = i + 2;
+ return line(finish(sb, start, i), "\r\n");
+ } else {
+ position = i + 1;
+ return line(finish(sb, start, i), "\r");
+ }
+ } else {
+ // We don't know what the next character is yet, check on next iteration.
+ cr = true;
+ position = i + 1;
+ break;
+ }
+ }
+ }
+
+ if (position < i) {
+ position = i;
+ }
+
+ // Haven't found a finished line yet, copy the data from the buffer so that we can fill
+ // the buffer again.
+ if (sb == null) {
+ sb = new StringBuilder(EXPECTED_LINE_LENGTH);
+ }
+ sb.append(cbuf, start, i - start);
+ }
+ }
+
+ /**
+ * Return the line terminator of the last read line from {@link #readLine()}.
+ *
+ * @return {@code "\n"}, {@code "\r"}, {@code "\r\n"}, or {@code null}
+ */
+ public String getLineTerminator() {
+ return lineTerminator;
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (reader == null) {
+ return;
+ }
+ try {
+ reader.close();
+ } finally {
+ reader = null;
+ cbuf = null;
+ }
+ }
+
+ private void fill() throws IOException {
+ int read;
+ do {
+ read = reader.read(cbuf, 0, cbuf.length);
+ } while (read == 0);
+ if (read > 0) {
+ limit = read;
+ position = 0;
+ }
+ }
+
+ private String line(String line, String lineTerminator) {
+ this.lineTerminator = lineTerminator;
+ return line;
+ }
+
+ private String finish(StringBuilder sb, int start, int end) {
+ int len = end - start;
+ if (sb == null) {
+ return new String(cbuf, start, len);
+ } else {
+ return sb.append(cbuf, start, len).toString();
+ }
+ }
+}
diff --git a/commonmark/src/main/java/org/commonmark/internal/util/LinkScanner.java b/commonmark/src/main/java/org/commonmark/internal/util/LinkScanner.java
index f25cd59e5..ffed047e5 100644
--- a/commonmark/src/main/java/org/commonmark/internal/util/LinkScanner.java
+++ b/commonmark/src/main/java/org/commonmark/internal/util/LinkScanner.java
@@ -1,69 +1,76 @@
package org.commonmark.internal.util;
+import org.commonmark.parser.beta.Scanner;
+
public class LinkScanner {
/**
- * Attempt to scan the contents of a link label (inside the brackets), returning the position after the content or
- * -1. The returned position can either be the closing {@code ]}, or the end of the line if the label continues on
+ * Attempt to scan the contents of a link label (inside the brackets), stopping after the content or returning false.
+ * The stopped position can bei either the closing {@code ]}, or the end of the line if the label continues on
* the next line.
*/
- public static int scanLinkLabelContent(CharSequence input, int start) {
- for (int i = start; i < input.length(); i++) {
- char c = input.charAt(i);
- switch (c) {
+ public static boolean scanLinkLabelContent(Scanner scanner) {
+ while (scanner.hasNext()) {
+ switch (scanner.peek()) {
case '\\':
- if (Parsing.isEscapable(input, i + 1)) {
- i += 1;
+ scanner.next();
+ if (isEscapable(scanner.peek())) {
+ scanner.next();
}
break;
case ']':
- return i;
+ return true;
case '[':
// spec: Unescaped square bracket characters are not allowed inside the opening and closing
// square brackets of link labels.
- return -1;
+ return false;
+ default:
+ scanner.next();
}
}
- return input.length();
+ return true;
}
/**
- * Attempt to scan a link destination, returning the position after the destination or -1.
+ * Attempt to scan a link destination, stopping after the destination or returning false.
*/
- public static int scanLinkDestination(CharSequence input, int start) {
- if (start >= input.length()) {
- return -1;
+ public static boolean scanLinkDestination(Scanner scanner) {
+ if (!scanner.hasNext()) {
+ return false;
}
- if (input.charAt(start) == '<') {
- for (int i = start + 1; i < input.length(); i++) {
- char c = input.charAt(i);
- switch (c) {
+ if (scanner.next('<')) {
+ while (scanner.hasNext()) {
+ switch (scanner.peek()) {
case '\\':
- if (Parsing.isEscapable(input, i + 1)) {
- i += 1;
+ scanner.next();
+ if (isEscapable(scanner.peek())) {
+ scanner.next();
}
break;
case '\n':
case '<':
- return -1;
+ return false;
case '>':
- return i + 1;
+ scanner.next();
+ return true;
+ default:
+ scanner.next();
}
}
- return -1;
+ return false;
} else {
- return scanLinkDestinationWithBalancedParens(input, start);
+ return scanLinkDestinationWithBalancedParens(scanner);
}
}
- public static int scanLinkTitle(CharSequence input, int start) {
- if (start >= input.length()) {
- return -1;
+ public static boolean scanLinkTitle(Scanner scanner) {
+ if (!scanner.hasNext()) {
+ return false;
}
char endDelimiter;
- switch (input.charAt(start)) {
+ switch (scanner.peek()) {
case '"':
endDelimiter = '"';
break;
@@ -74,75 +81,122 @@ public static int scanLinkTitle(CharSequence input, int start) {
endDelimiter = ')';
break;
default:
- return -1;
+ return false;
}
+ scanner.next();
- int afterContent = scanLinkTitleContent(input, start + 1, endDelimiter);
- if (afterContent == -1) {
- return -1;
+ if (!scanLinkTitleContent(scanner, endDelimiter)) {
+ return false;
}
-
- if (afterContent >= input.length() || input.charAt(afterContent) != endDelimiter) {
- // missing or wrong end delimiter
- return -1;
+ if (!scanner.hasNext()) {
+ return false;
}
-
- return afterContent + 1;
+ scanner.next();
+ return true;
}
- public static int scanLinkTitleContent(CharSequence input, int start, char endDelimiter) {
- for (int i = start; i < input.length(); i++) {
- char c = input.charAt(i);
- if (c == '\\' && Parsing.isEscapable(input, i + 1)) {
- i += 1;
+ public static boolean scanLinkTitleContent(Scanner scanner, char endDelimiter) {
+ while (scanner.hasNext()) {
+ char c = scanner.peek();
+ if (c == '\\') {
+ scanner.next();
+ if (isEscapable(scanner.peek())) {
+ scanner.next();
+ }
} else if (c == endDelimiter) {
- return i;
+ return true;
} else if (endDelimiter == ')' && c == '(') {
// unescaped '(' in title within parens is invalid
- return -1;
+ return false;
+ } else {
+ scanner.next();
}
}
- return input.length();
+ return true;
}
// spec: a nonempty sequence of characters that does not start with <, does not include ASCII space or control
// characters, and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a balanced
// pair of unescaped parentheses
- private static int scanLinkDestinationWithBalancedParens(CharSequence input, int start) {
+ private static boolean scanLinkDestinationWithBalancedParens(Scanner scanner) {
int parens = 0;
- for (int i = start; i < input.length(); i++) {
- char c = input.charAt(i);
+ boolean empty = true;
+ while (scanner.hasNext()) {
+ char c = scanner.peek();
switch (c) {
- case '\0':
case ' ':
- return i != start ? i : -1;
+ return !empty;
case '\\':
- if (Parsing.isEscapable(input, i + 1)) {
- i += 1;
+ scanner.next();
+ if (isEscapable(scanner.peek())) {
+ scanner.next();
}
break;
case '(':
parens++;
// Limit to 32 nested parens for pathological cases
if (parens > 32) {
- return -1;
+ return false;
}
+ scanner.next();
break;
case ')':
if (parens == 0) {
- return i;
+ return true;
} else {
parens--;
}
+ scanner.next();
break;
default:
// or control character
if (Character.isISOControl(c)) {
- return i != start ? i : -1;
+ return !empty;
}
+ scanner.next();
break;
}
+ empty = false;
+ }
+ return true;
+ }
+
+ private static boolean isEscapable(char c) {
+ switch (c) {
+ case '!':
+ case '"':
+ case '#':
+ case '$':
+ case '%':
+ case '&':
+ case '\'':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case ',':
+ case '-':
+ case '.':
+ case '/':
+ case ':':
+ case ';':
+ case '<':
+ case '=':
+ case '>':
+ case '?':
+ case '@':
+ case '[':
+ case '\\':
+ case ']':
+ case '^':
+ case '_':
+ case '`':
+ case '{':
+ case '|':
+ case '}':
+ case '~':
+ return true;
}
- return input.length();
+ return false;
}
}
diff --git a/commonmark/src/main/java/org/commonmark/internal/util/Parsing.java b/commonmark/src/main/java/org/commonmark/internal/util/Parsing.java
index d429d9db0..972fdef62 100644
--- a/commonmark/src/main/java/org/commonmark/internal/util/Parsing.java
+++ b/commonmark/src/main/java/org/commonmark/internal/util/Parsing.java
@@ -1,208 +1,10 @@
package org.commonmark.internal.util;
public class Parsing {
-
- private static final String TAGNAME = "[A-Za-z][A-Za-z0-9-]*";
- private static final String ATTRIBUTENAME = "[a-zA-Z_:][a-zA-Z0-9:._-]*";
- private static final String UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+";
- private static final String SINGLEQUOTEDVALUE = "'[^']*'";
- private static final String DOUBLEQUOTEDVALUE = "\"[^\"]*\"";
- private static final String ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE
- + "|" + DOUBLEQUOTEDVALUE + ")";
- private static final String ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE
- + ")";
- private static final String ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC
- + "?)";
-
- public static final String OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>";
- public static final String CLOSETAG = "" + TAGNAME + "\\s*[>]";
-
public static int CODE_BLOCK_INDENT = 4;
public static int columnsToNextTabStop(int column) {
// Tab stop is 4
return 4 - (column % 4);
}
-
- public static int find(char c, CharSequence s, int startIndex) {
- int length = s.length();
- for (int i = startIndex; i < length; i++) {
- if (s.charAt(i) == c) {
- return i;
- }
- }
- return -1;
- }
-
- public static int findLineBreak(CharSequence s, int startIndex) {
- int length = s.length();
- for (int i = startIndex; i < length; i++) {
- switch (s.charAt(i)) {
- case '\n':
- case '\r':
- return i;
- }
- }
- return -1;
- }
-
- public static boolean isBlank(CharSequence s) {
- return findNonSpace(s, 0) == -1;
- }
-
- public static boolean hasNonSpace(CharSequence s) {
- int length = s.length();
- int skipped = skip(' ', s, 0, length);
- return skipped != length;
- }
-
- public static boolean isLetter(CharSequence s, int index) {
- int codePoint = Character.codePointAt(s, index);
- return Character.isLetter(codePoint);
- }
-
- public static boolean isSpaceOrTab(CharSequence s, int index) {
- if (index < s.length()) {
- switch (s.charAt(index)) {
- case ' ':
- case '\t':
- return true;
- }
- }
- return false;
- }
-
- public static boolean isEscapable(CharSequence s, int index) {
- if (index < s.length()) {
- switch (s.charAt(index)) {
- case '!':
- case '"':
- case '#':
- case '$':
- case '%':
- case '&':
- case '\'':
- case '(':
- case ')':
- case '*':
- case '+':
- case ',':
- case '-':
- case '.':
- case '/':
- case ':':
- case ';':
- case '<':
- case '=':
- case '>':
- case '?':
- case '@':
- case '[':
- case '\\':
- case ']':
- case '^':
- case '_':
- case '`':
- case '{':
- case '|':
- case '}':
- case '~':
- return true;
- }
- }
- return false;
- }
-
- /**
- * Prepares the input line replacing {@code \0}
- */
- public static CharSequence prepareLine(CharSequence line) {
- // Avoid building a new string in the majority of cases (no \0)
- StringBuilder sb = null;
- int length = line.length();
- for (int i = 0; i < length; i++) {
- char c = line.charAt(i);
- switch (c) {
- case '\0':
- if (sb == null) {
- sb = new StringBuilder(length);
- sb.append(line, 0, i);
- }
- sb.append('\uFFFD');
- break;
- default:
- if (sb != null) {
- sb.append(c);
- }
- }
- }
-
- if (sb != null) {
- return sb.toString();
- } else {
- return line;
- }
- }
-
- public static int skip(char skip, CharSequence s, int startIndex, int endIndex) {
- for (int i = startIndex; i < endIndex; i++) {
- if (s.charAt(i) != skip) {
- return i;
- }
- }
- return endIndex;
- }
-
- public static int skipBackwards(char skip, CharSequence s, int startIndex, int lastIndex) {
- for (int i = startIndex; i >= lastIndex; i--) {
- if (s.charAt(i) != skip) {
- return i;
- }
- }
- return lastIndex - 1;
- }
-
- public static int skipSpaceTab(CharSequence s, int startIndex, int endIndex) {
- for (int i = startIndex; i < endIndex; i++) {
- switch (s.charAt(i)) {
- case ' ':
- case '\t':
- break;
- default:
- return i;
- }
- }
- return endIndex;
- }
-
- public static int skipSpaceTabBackwards(CharSequence s, int startIndex, int lastIndex) {
- for (int i = startIndex; i >= lastIndex; i--) {
- switch (s.charAt(i)) {
- case ' ':
- case '\t':
- break;
- default:
- return i;
- }
- }
- return lastIndex - 1;
- }
-
- private static int findNonSpace(CharSequence s, int startIndex) {
- int length = s.length();
- for (int i = startIndex; i < length; i++) {
- switch (s.charAt(i)) {
- case ' ':
- case '\t':
- case '\n':
- case '\u000B':
- case '\f':
- case '\r':
- break;
- default:
- return i;
- }
- }
- return -1;
- }
}
diff --git a/commonmark/src/main/java/org/commonmark/node/Block.java b/commonmark/src/main/java/org/commonmark/node/Block.java
index e6a317d7c..332346b0e 100644
--- a/commonmark/src/main/java/org/commonmark/node/Block.java
+++ b/commonmark/src/main/java/org/commonmark/node/Block.java
@@ -1,7 +1,11 @@
package org.commonmark.node;
+/**
+ * Block nodes such as paragraphs, list blocks, code blocks etc.
+ */
public abstract class Block extends Node {
+ @Override
public Block getParent() {
return (Block) super.getParent();
}
diff --git a/commonmark/src/main/java/org/commonmark/node/BlockQuote.java b/commonmark/src/main/java/org/commonmark/node/BlockQuote.java
index 160f25ae2..f68252398 100644
--- a/commonmark/src/main/java/org/commonmark/node/BlockQuote.java
+++ b/commonmark/src/main/java/org/commonmark/node/BlockQuote.java
@@ -1,5 +1,15 @@
package org.commonmark.node;
+/**
+ * A block quote, e.g.:
+ *
+ * > Some quoted text
+ *
+ *
+ * Note that child nodes are themselves blocks, e.g. {@link Paragraph}, {@link ListBlock} etc.
+ *
+ * @see CommonMark Spec
+ */
public class BlockQuote extends Block {
@Override
diff --git a/commonmark/src/main/java/org/commonmark/node/BulletList.java b/commonmark/src/main/java/org/commonmark/node/BulletList.java
index 127862312..014f4d3b2 100644
--- a/commonmark/src/main/java/org/commonmark/node/BulletList.java
+++ b/commonmark/src/main/java/org/commonmark/node/BulletList.java
@@ -1,20 +1,50 @@
package org.commonmark.node;
+/**
+ * A bullet list, e.g.:
+ *
+ * - One
+ * - Two
+ * - Three
+ *
+ *
+ * The children are {@link ListItem} blocks, which contain other blocks (or nested lists).
+ *
+ * @see CommonMark Spec: List items
+ */
public class BulletList extends ListBlock {
- private char bulletMarker;
+ private String marker;
@Override
public void accept(Visitor visitor) {
visitor.visit(this);
}
+ /**
+ * @return the bullet list marker that was used, e.g. {@code -}, {@code *} or {@code +}, if available, or null otherwise
+ */
+ public String getMarker() {
+ return marker;
+ }
+
+ public void setMarker(String marker) {
+ this.marker = marker;
+ }
+
+ /**
+ * @deprecated use {@link #getMarker()} instead
+ */
+ @Deprecated
public char getBulletMarker() {
- return bulletMarker;
+ return marker != null && !marker.isEmpty() ? marker.charAt(0) : '\0';
}
+ /**
+ * @deprecated use {@link #getMarker()} instead
+ */
+ @Deprecated
public void setBulletMarker(char bulletMarker) {
- this.bulletMarker = bulletMarker;
+ this.marker = bulletMarker != '\0' ? String.valueOf(bulletMarker) : null;
}
-
}
diff --git a/commonmark/src/main/java/org/commonmark/node/Code.java b/commonmark/src/main/java/org/commonmark/node/Code.java
index 0b47ecb71..3b79e0c9c 100644
--- a/commonmark/src/main/java/org/commonmark/node/Code.java
+++ b/commonmark/src/main/java/org/commonmark/node/Code.java
@@ -1,5 +1,13 @@
package org.commonmark.node;
+/**
+ * Inline code span, e.g.:
+ *
+ * Some `inline code`
+ *
+ *
+ * @see CommonMark Spec
+ */
public class Code extends Node {
private String literal;
@@ -16,6 +24,10 @@ public void accept(Visitor visitor) {
visitor.visit(this);
}
+ /**
+ * @return the literal text in the code span (note that it's not necessarily the raw text between tildes,
+ * e.g. when spaces are stripped)
+ */
public String getLiteral() {
return literal;
}
diff --git a/commonmark/src/main/java/org/commonmark/node/CustomBlock.java b/commonmark/src/main/java/org/commonmark/node/CustomBlock.java
index 6596ec1a0..cad88933a 100644
--- a/commonmark/src/main/java/org/commonmark/node/CustomBlock.java
+++ b/commonmark/src/main/java/org/commonmark/node/CustomBlock.java
@@ -1,5 +1,8 @@
package org.commonmark.node;
+/**
+ * A block that extensions can subclass to define custom blocks (not part of the core specification).
+ */
public abstract class CustomBlock extends Block {
@Override
diff --git a/commonmark/src/main/java/org/commonmark/node/CustomNode.java b/commonmark/src/main/java/org/commonmark/node/CustomNode.java
index a68e5cc11..88f0254da 100644
--- a/commonmark/src/main/java/org/commonmark/node/CustomNode.java
+++ b/commonmark/src/main/java/org/commonmark/node/CustomNode.java
@@ -1,5 +1,8 @@
package org.commonmark.node;
+/**
+ * A node that extensions can subclass to define custom nodes (not part of the core specification).
+ */
public abstract class CustomNode extends Node {
@Override
public void accept(Visitor visitor) {
diff --git a/commonmark/src/main/java/org/commonmark/node/DefinitionMap.java b/commonmark/src/main/java/org/commonmark/node/DefinitionMap.java
new file mode 100644
index 000000000..59cb88274
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/node/DefinitionMap.java
@@ -0,0 +1,67 @@
+package org.commonmark.node;
+
+import org.commonmark.internal.util.Escaping;
+
+import java.util.Collection;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * A map that can be used to store and look up reference definitions by a label. The labels are case-insensitive and
+ * normalized, the same way as for {@link LinkReferenceDefinition} nodes.
+ *
+ * @param the type of value
+ */
+public class DefinitionMap {
+
+ private final Class type;
+ // LinkedHashMap for determinism and to preserve document order
+ private final Map definitions = new LinkedHashMap<>();
+
+ public DefinitionMap(Class type) {
+ this.type = type;
+ }
+
+ public Class getType() {
+ return type;
+ }
+
+ public void addAll(DefinitionMap that) {
+ for (var entry : that.definitions.entrySet()) {
+ // Note that keys are already normalized, so we can add them directly
+ definitions.putIfAbsent(entry.getKey(), entry.getValue());
+ }
+ }
+
+ /**
+ * Store a new definition unless one is already in the map. If there is no definition for that label yet, return null.
+ * Otherwise, return the existing definition.
+ *
+ * The label is normalized by the definition map before storing.
+ */
+ public D putIfAbsent(String label, D definition) {
+ String normalizedLabel = Escaping.normalizeLabelContent(label);
+
+ // spec: When there are multiple matching link reference definitions, the first is used
+ return definitions.putIfAbsent(normalizedLabel, definition);
+ }
+
+ /**
+ * Look up a definition by label. The label is normalized by the definition map before lookup.
+ *
+ * @return the value or null
+ */
+ public D get(String label) {
+ String normalizedLabel = Escaping.normalizeLabelContent(label);
+ return definitions.get(normalizedLabel);
+ }
+
+ public Set keySet() {
+ return definitions.keySet();
+ }
+
+ public Collection values() {
+ return definitions.values();
+ }
+}
diff --git a/commonmark/src/main/java/org/commonmark/node/Document.java b/commonmark/src/main/java/org/commonmark/node/Document.java
index 5b7e74189..b4968c206 100644
--- a/commonmark/src/main/java/org/commonmark/node/Document.java
+++ b/commonmark/src/main/java/org/commonmark/node/Document.java
@@ -1,5 +1,8 @@
package org.commonmark.node;
+/**
+ * The root block of a document, containing the top-level blocks.
+ */
public class Document extends Block {
@Override
diff --git a/commonmark/src/main/java/org/commonmark/node/Emphasis.java b/commonmark/src/main/java/org/commonmark/node/Emphasis.java
index 9877e7b63..5efc8c327 100644
--- a/commonmark/src/main/java/org/commonmark/node/Emphasis.java
+++ b/commonmark/src/main/java/org/commonmark/node/Emphasis.java
@@ -1,5 +1,13 @@
package org.commonmark.node;
+/**
+ * Emphasis, e.g.:
+ *
+ *
+ * @see CommonMark Spec
+ */
public class IndentedCodeBlock extends Block {
private String literal;
diff --git a/commonmark/src/main/java/org/commonmark/node/Link.java b/commonmark/src/main/java/org/commonmark/node/Link.java
index b2ed8c2a1..4edc7f676 100644
--- a/commonmark/src/main/java/org/commonmark/node/Link.java
+++ b/commonmark/src/main/java/org/commonmark/node/Link.java
@@ -18,7 +18,7 @@
* Note that the text in the link can contain inline formatting, so it could also contain an {@link Image} or
* {@link Emphasis}, etc.
*
- * @see CommonMark Spec for links
+ * @see CommonMark Spec
*/
public class Link extends Node {
@@ -46,6 +46,9 @@ public void setDestination(String destination) {
this.destination = destination;
}
+ /**
+ * @return the title or null
+ */
public String getTitle() {
return title;
}
diff --git a/commonmark/src/main/java/org/commonmark/node/LinkReferenceDefinition.java b/commonmark/src/main/java/org/commonmark/node/LinkReferenceDefinition.java
index 3f8bfd0f0..b866781f0 100644
--- a/commonmark/src/main/java/org/commonmark/node/LinkReferenceDefinition.java
+++ b/commonmark/src/main/java/org/commonmark/node/LinkReferenceDefinition.java
@@ -9,9 +9,9 @@
* They can be referenced anywhere else in the document to produce a link using [foo]. The definitions
* themselves are usually not rendered in the final output.
*
- * @see Link reference definitions
+ * @see CommonMark Spec
*/
-public class LinkReferenceDefinition extends Node {
+public class LinkReferenceDefinition extends Block {
private String label;
private String destination;
diff --git a/commonmark/src/main/java/org/commonmark/node/ListBlock.java b/commonmark/src/main/java/org/commonmark/node/ListBlock.java
index 69482f66e..1290bc622 100644
--- a/commonmark/src/main/java/org/commonmark/node/ListBlock.java
+++ b/commonmark/src/main/java/org/commonmark/node/ListBlock.java
@@ -1,12 +1,15 @@
package org.commonmark.node;
+/**
+ * A list block like {@link BulletList} or {@link OrderedList}.
+ */
public abstract class ListBlock extends Block {
private boolean tight;
/**
* @return whether this list is tight or loose
- * @see CommonMark Spec for tight lists
+ * @see CommonMark Spec for tight lists
*/
public boolean isTight() {
return tight;
diff --git a/commonmark/src/main/java/org/commonmark/node/ListItem.java b/commonmark/src/main/java/org/commonmark/node/ListItem.java
index aa526be01..c4d1214e7 100644
--- a/commonmark/src/main/java/org/commonmark/node/ListItem.java
+++ b/commonmark/src/main/java/org/commonmark/node/ListItem.java
@@ -1,9 +1,78 @@
package org.commonmark.node;
+/**
+ * A child of a {@link ListBlock}, containing other blocks (e.g. {@link Paragraph}, other lists, etc).
+ *
+ * Note that a list item can't directly contain {@link Text}, it needs to be:
+ * {@link ListItem} : {@link Paragraph} : {@link Text}.
+ * If you want a list that is rendered tightly, create a list with {@link ListBlock#setTight(boolean)}.
+ *
+ * @see CommonMark Spec: List items
+ */
public class ListItem extends Block {
+ private Integer markerIndent;
+ private Integer contentIndent;
+
@Override
public void accept(Visitor visitor) {
visitor.visit(this);
}
+
+ /**
+ * Returns the indent of the marker such as "-" or "1." in columns (spaces or tab stop of 4) if available, or null
+ * otherwise.
+ *
+ * Some examples and their marker indent:
+ *
- Foo
+ * Marker indent: 0
+ *
- Foo
+ * Marker indent: 1
+ *
1. Foo
+ * Marker indent: 2
+ */
+ public Integer getMarkerIndent() {
+ return markerIndent;
+ }
+
+ public void setMarkerIndent(Integer markerIndent) {
+ this.markerIndent = markerIndent;
+ }
+
+ /**
+ * Returns the indent of the content in columns (spaces or tab stop of 4) if available, or null otherwise.
+ * The content indent is counted from the beginning of the line and includes the marker on the first line.
+ *
+ * Some examples and their content indent:
+ *
- Foo
+ * Content indent: 2
+ *
- Foo
+ * Content indent: 3
+ *
1. Foo
+ * Content indent: 5
+ *
+ * Note that subsequent lines in the same list item need to be indented by at least the content indent to be counted
+ * as part of the list item.
+ */
+ public Integer getContentIndent() {
+ return contentIndent;
+ }
+
+ public void setContentIndent(Integer contentIndent) {
+ this.contentIndent = contentIndent;
+ }
+
+ /**
+ * @deprecated list items should only contain block nodes; if you're trying to create a list that is rendered
+ * without paragraphs, use {@link ListBlock#setTight(boolean)} instead.
+ */
+ @Override
+ @Deprecated
+ public void appendChild(Node child) {
+ super.appendChild(child);
+ }
+
+ public void appendChild(Block child) {
+ super.appendChild(child);
+ }
}
diff --git a/commonmark/src/main/java/org/commonmark/node/Node.java b/commonmark/src/main/java/org/commonmark/node/Node.java
index e7b24c08c..d95a72c60 100644
--- a/commonmark/src/main/java/org/commonmark/node/Node.java
+++ b/commonmark/src/main/java/org/commonmark/node/Node.java
@@ -1,5 +1,14 @@
package org.commonmark.node;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * The base class of all CommonMark AST nodes ({@link Block} and inlines).
+ *
+ * A node can have multiple children, and a parent (except for the root node).
+ */
public abstract class Node {
private Node parent = null;
@@ -7,6 +16,7 @@ public abstract class Node {
private Node lastChild = null;
private Node prev = null;
private Node next = null;
+ private List sourceSpans = null;
public abstract void accept(Visitor visitor);
@@ -76,6 +86,9 @@ public void unlink() {
this.prev = null;
}
+ /**
+ * Inserts the {@code sibling} node after {@code this} node.
+ */
public void insertAfter(Node sibling) {
sibling.unlink();
sibling.next = this.next;
@@ -90,6 +103,9 @@ public void insertAfter(Node sibling) {
}
}
+ /**
+ * Inserts the {@code sibling} node before {@code this} node.
+ */
public void insertBefore(Node sibling) {
sibling.unlink();
sibling.prev = this.prev;
@@ -104,6 +120,41 @@ public void insertBefore(Node sibling) {
}
}
+ /**
+ * @return the source spans of this node if included by the parser, an empty list otherwise
+ * @since 0.16.0
+ */
+ public List getSourceSpans() {
+ return sourceSpans != null ? Collections.unmodifiableList(sourceSpans) : List.of();
+ }
+
+ /**
+ * Replace the current source spans with the provided list.
+ *
+ * @param sourceSpans the new source spans to set
+ * @since 0.16.0
+ */
+ public void setSourceSpans(List sourceSpans) {
+ if (sourceSpans.isEmpty()) {
+ this.sourceSpans = null;
+ } else {
+ this.sourceSpans = new ArrayList<>(sourceSpans);
+ }
+ }
+
+ /**
+ * Add a source span to the end of the list.
+ *
+ * @param sourceSpan the source span to add
+ * @since 0.16.0
+ */
+ public void addSourceSpan(SourceSpan sourceSpan) {
+ if (sourceSpans == null) {
+ this.sourceSpans = new ArrayList<>();
+ }
+ this.sourceSpans.add(sourceSpan);
+ }
+
@Override
public String toString() {
return getClass().getSimpleName() + "{" + toStringAttributes() + "}";
diff --git a/commonmark/src/main/java/org/commonmark/node/Nodes.java b/commonmark/src/main/java/org/commonmark/node/Nodes.java
new file mode 100644
index 000000000..22d5932af
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/node/Nodes.java
@@ -0,0 +1,66 @@
+package org.commonmark.node;
+
+import java.util.Iterator;
+
+/**
+ * Utility class for working with multiple {@link Node}s.
+ *
+ * @since 0.16.0
+ */
+public class Nodes {
+
+ private Nodes() {
+ }
+
+ /**
+ * The nodes between (not including) start and end.
+ */
+ public static Iterable between(Node start, Node end) {
+ return new NodeIterable(start.getNext(), end);
+ }
+
+ private static class NodeIterable implements Iterable {
+
+ private final Node first;
+ private final Node end;
+
+ private NodeIterable(Node first, Node end) {
+ this.first = first;
+ this.end = end;
+ }
+
+ @Override
+ public Iterator iterator() {
+ return new NodeIterator(first, end);
+ }
+ }
+
+ private static class NodeIterator implements Iterator {
+
+ private Node node;
+ private final Node end;
+
+ private NodeIterator(Node first, Node end) {
+ node = first;
+ this.end = end;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return node != null && node != end;
+ }
+
+ @Override
+ public Node next() {
+ Node result = node;
+ node = node.getNext();
+ return result;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("remove");
+ }
+ }
+}
+
diff --git a/commonmark/src/main/java/org/commonmark/node/OrderedList.java b/commonmark/src/main/java/org/commonmark/node/OrderedList.java
index 1f988234c..61f8902c0 100644
--- a/commonmark/src/main/java/org/commonmark/node/OrderedList.java
+++ b/commonmark/src/main/java/org/commonmark/node/OrderedList.java
@@ -1,29 +1,78 @@
package org.commonmark.node;
+/**
+ * An ordered list, e.g.:
+ *
+ * 1. One
+ * 2. Two
+ * 3. Three
+ *
+ *
+ * The children are {@link ListItem} blocks, which contain other blocks (or nested lists).
+ *
+ * @see CommonMark Spec: List items
+ */
public class OrderedList extends ListBlock {
- private int startNumber;
- private char delimiter;
+ private String markerDelimiter;
+ private Integer markerStartNumber;
@Override
public void accept(Visitor visitor) {
visitor.visit(this);
}
+ /**
+ * @return the start number used in the marker, e.g. {@code 1}, if available, or null otherwise
+ */
+ public Integer getMarkerStartNumber() {
+ return markerStartNumber;
+ }
+
+ public void setMarkerStartNumber(Integer markerStartNumber) {
+ this.markerStartNumber = markerStartNumber;
+ }
+
+ /**
+ * @return the delimiter used in the marker, e.g. {@code .} or {@code )}, if available, or null otherwise
+ */
+ public String getMarkerDelimiter() {
+ return markerDelimiter;
+ }
+
+ public void setMarkerDelimiter(String markerDelimiter) {
+ this.markerDelimiter = markerDelimiter;
+ }
+
+ /**
+ * @deprecated use {@link #getMarkerStartNumber()} instead
+ */
+ @Deprecated
public int getStartNumber() {
- return startNumber;
+ return markerStartNumber != null ? markerStartNumber : 0;
}
+ /**
+ * @deprecated use {@link #setMarkerStartNumber} instead
+ */
+ @Deprecated
public void setStartNumber(int startNumber) {
- this.startNumber = startNumber;
+ this.markerStartNumber = startNumber != 0 ? startNumber : null;
}
+ /**
+ * @deprecated use {@link #getMarkerDelimiter()} instead
+ */
+ @Deprecated
public char getDelimiter() {
- return delimiter;
+ return markerDelimiter != null && !markerDelimiter.isEmpty() ? markerDelimiter.charAt(0) : '\0';
}
+ /**
+ * @deprecated use {@link #setMarkerDelimiter} instead
+ */
+ @Deprecated
public void setDelimiter(char delimiter) {
- this.delimiter = delimiter;
+ this.markerDelimiter = delimiter != '\0' ? String.valueOf(delimiter) : null;
}
-
}
diff --git a/commonmark/src/main/java/org/commonmark/node/Paragraph.java b/commonmark/src/main/java/org/commonmark/node/Paragraph.java
index 0c3f88f39..b298f1ce4 100644
--- a/commonmark/src/main/java/org/commonmark/node/Paragraph.java
+++ b/commonmark/src/main/java/org/commonmark/node/Paragraph.java
@@ -1,5 +1,10 @@
package org.commonmark.node;
+/**
+ * A paragraph block, contains inline nodes such as {@link Text}
+ *
+ * @see CommonMark Spec
+ */
public class Paragraph extends Block {
@Override
diff --git a/commonmark/src/main/java/org/commonmark/node/SoftLineBreak.java b/commonmark/src/main/java/org/commonmark/node/SoftLineBreak.java
index e66458912..87445db56 100644
--- a/commonmark/src/main/java/org/commonmark/node/SoftLineBreak.java
+++ b/commonmark/src/main/java/org/commonmark/node/SoftLineBreak.java
@@ -1,5 +1,14 @@
package org.commonmark.node;
+/**
+ * A soft line break (as opposed to a {@link HardLineBreak}), e.g. between:
+ *
+ * foo
+ * bar
+ *
+ *
+ * @see CommonMark Spec
+ */
public class SoftLineBreak extends Node {
@Override
diff --git a/commonmark/src/main/java/org/commonmark/node/SourceSpan.java b/commonmark/src/main/java/org/commonmark/node/SourceSpan.java
new file mode 100644
index 000000000..6558cc84a
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/node/SourceSpan.java
@@ -0,0 +1,150 @@
+package org.commonmark.node;
+
+import java.util.Objects;
+
+/**
+ * A source span references a snippet of text from the source input.
+ *
+ * It has a starting position (line and column index) and a length of how many characters it spans.
+ *
+ * For example, this CommonMark source text:
+ *
+ * > foo
+ *
+ * The {@link BlockQuote} node would have this source span: line 0, column 0, length 5.
+ *
+ * The {@link Paragraph} node inside it would have: line 0, column 2, length 3.
+ *
+ * If a block has multiple lines, it will have a source span for each line.
+ *
+ * Note that the column index and length are measured in Java characters (UTF-16 code units). If you're outputting them
+ * to be consumed by another programming language, e.g. one that uses UTF-8 strings, you will need to translate them,
+ * otherwise characters such as emojis will result in incorrect positions.
+ *
+ * @since 0.16.0
+ */
+public class SourceSpan {
+
+ private final int lineIndex;
+ private final int columnIndex;
+ private final int inputIndex;
+ private final int length;
+
+ public static SourceSpan of(int line, int col, int input, int length) {
+ return new SourceSpan(line, col, input, length);
+ }
+
+ /**
+ * @deprecated Use {{@link #of(int, int, int, int)}} instead to also specify input index. Using the deprecated one
+ * will set {@link #inputIndex} to 0.
+ */
+ @Deprecated
+ public static SourceSpan of(int lineIndex, int columnIndex, int length) {
+ return of(lineIndex, columnIndex, 0, length);
+ }
+
+ private SourceSpan(int lineIndex, int columnIndex, int inputIndex, int length) {
+ if (lineIndex < 0) {
+ throw new IllegalArgumentException("lineIndex " + lineIndex + " must be >= 0");
+ }
+ if (columnIndex < 0) {
+ throw new IllegalArgumentException("columnIndex " + columnIndex + " must be >= 0");
+ }
+ if (inputIndex < 0) {
+ throw new IllegalArgumentException("inputIndex " + inputIndex + " must be >= 0");
+ }
+ if (length < 0) {
+ throw new IllegalArgumentException("length " + length + " must be >= 0");
+ }
+ this.lineIndex = lineIndex;
+ this.columnIndex = columnIndex;
+ this.inputIndex = inputIndex;
+ this.length = length;
+ }
+
+ /**
+ * @return 0-based line index, e.g. 0 for first line, 1 for the second line, etc
+ */
+ public int getLineIndex() {
+ return lineIndex;
+ }
+
+ /**
+ * @return 0-based index of column (character on line) in source, e.g. 0 for the first character of a line, 1 for
+ * the second character, etc
+ */
+ public int getColumnIndex() {
+ return columnIndex;
+ }
+
+ /**
+ * @return 0-based index in whole input
+ * @since 0.24.0
+ */
+ public int getInputIndex() {
+ return inputIndex;
+ }
+
+ /**
+ * @return length of the span in characters
+ */
+ public int getLength() {
+ return length;
+ }
+
+ public SourceSpan subSpan(int beginIndex) {
+ return subSpan(beginIndex, length);
+ }
+
+ public SourceSpan subSpan(int beginIndex, int endIndex) {
+ if (beginIndex < 0) {
+ throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " + must be >= 0");
+ }
+ if (beginIndex > length) {
+ throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " must be <= length " + length);
+ }
+ if (endIndex < 0) {
+ throw new IndexOutOfBoundsException("endIndex " + endIndex + " + must be >= 0");
+ }
+ if (endIndex > length) {
+ throw new IndexOutOfBoundsException("endIndex " + endIndex + " must be <= length " + length);
+ }
+ if (beginIndex > endIndex) {
+ throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " must be <= endIndex " + endIndex);
+ }
+ if (beginIndex == 0 && endIndex == length) {
+ return this;
+ }
+ return new SourceSpan(lineIndex, columnIndex + beginIndex, inputIndex + beginIndex, endIndex - beginIndex);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ SourceSpan that = (SourceSpan) o;
+ return lineIndex == that.lineIndex &&
+ columnIndex == that.columnIndex &&
+ inputIndex == that.inputIndex &&
+ length == that.length;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(lineIndex, columnIndex, inputIndex, length);
+ }
+
+ @Override
+ public String toString() {
+ return "SourceSpan{" +
+ "line=" + lineIndex +
+ ", column=" + columnIndex +
+ ", input=" + inputIndex +
+ ", length=" + length +
+ "}";
+ }
+}
diff --git a/commonmark/src/main/java/org/commonmark/node/SourceSpans.java b/commonmark/src/main/java/org/commonmark/node/SourceSpans.java
new file mode 100644
index 000000000..975d7fbdb
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/node/SourceSpans.java
@@ -0,0 +1,52 @@
+package org.commonmark.node;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A list of source spans that can be added to. Takes care of merging adjacent source spans.
+ *
+ * @since 0.16.0
+ */
+public class SourceSpans {
+
+ private List sourceSpans;
+
+ public static SourceSpans empty() {
+ return new SourceSpans();
+ }
+
+ public List getSourceSpans() {
+ return sourceSpans != null ? sourceSpans : List.of();
+ }
+
+ public void addAllFrom(Iterable extends Node> nodes) {
+ for (Node node : nodes) {
+ addAll(node.getSourceSpans());
+ }
+ }
+
+ public void addAll(List other) {
+ if (other.isEmpty()) {
+ return;
+ }
+
+ if (sourceSpans == null) {
+ sourceSpans = new ArrayList<>();
+ }
+
+ if (sourceSpans.isEmpty()) {
+ sourceSpans.addAll(other);
+ } else {
+ int lastIndex = sourceSpans.size() - 1;
+ SourceSpan a = sourceSpans.get(lastIndex);
+ SourceSpan b = other.get(0);
+ if (a.getInputIndex() + a.getLength() == b.getInputIndex()) {
+ sourceSpans.set(lastIndex, SourceSpan.of(a.getLineIndex(), a.getColumnIndex(), a.getInputIndex(), a.getLength() + b.getLength()));
+ sourceSpans.addAll(other.subList(1, other.size()));
+ } else {
+ sourceSpans.addAll(other);
+ }
+ }
+ }
+}
diff --git a/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java b/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java
index dbff571cd..0dbeed3df 100644
--- a/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java
+++ b/commonmark/src/main/java/org/commonmark/node/StrongEmphasis.java
@@ -1,5 +1,13 @@
package org.commonmark.node;
+/**
+ * Strong emphasis, e.g.:
+ *
+ * Some **strong emphasis** or __strong emphasis__
+ *
+ *
+ * @see CommonMark Spec: Emphasis and strong emphasis
+ */
public class StrongEmphasis extends Node implements Delimited {
private String delimiter;
diff --git a/commonmark/src/main/java/org/commonmark/node/Text.java b/commonmark/src/main/java/org/commonmark/node/Text.java
index f16fc907b..9a04c41c1 100644
--- a/commonmark/src/main/java/org/commonmark/node/Text.java
+++ b/commonmark/src/main/java/org/commonmark/node/Text.java
@@ -1,5 +1,15 @@
package org.commonmark.node;
+/**
+ * A text node, e.g. in:
+ *
+ * foo *bar*
+ *
+ *
+ * The foo is a text node, and the bar inside the emphasis is also a text node.
+ *
+ * @see CommonMark Spec
+ */
public class Text extends Node {
private String literal;
diff --git a/commonmark/src/main/java/org/commonmark/node/ThematicBreak.java b/commonmark/src/main/java/org/commonmark/node/ThematicBreak.java
index f81abaa31..a31131e07 100644
--- a/commonmark/src/main/java/org/commonmark/node/ThematicBreak.java
+++ b/commonmark/src/main/java/org/commonmark/node/ThematicBreak.java
@@ -1,9 +1,34 @@
package org.commonmark.node;
+/**
+ * A thematic break, e.g. between text:
+ *
+ * Some text
+ *
+ * ___
+ *
+ * Some other text.
+ *
+ *
+ * @see CommonMark Spec
+ */
public class ThematicBreak extends Block {
+ private String literal;
+
@Override
public void accept(Visitor visitor) {
visitor.visit(this);
}
+
+ /**
+ * @return the source literal that represents this node, if available
+ */
+ public String getLiteral() {
+ return literal;
+ }
+
+ public void setLiteral(String literal) {
+ this.literal = literal;
+ }
}
diff --git a/commonmark/src/main/java/org/commonmark/package-info.java b/commonmark/src/main/java/org/commonmark/package-info.java
index e3f0e0572..b683017f6 100644
--- a/commonmark/src/main/java/org/commonmark/package-info.java
+++ b/commonmark/src/main/java/org/commonmark/package-info.java
@@ -1,10 +1,10 @@
/**
* Root package of commonmark-java
- *
*
*
{@link org.commonmark.parser} for parsing input text to AST nodes
*
{@link org.commonmark.node} for AST node types and visitors
*
{@link org.commonmark.renderer.html} for HTML rendering
+ *
{@link org.commonmark.renderer.markdown} for Markdown rendering
*
*/
package org.commonmark;
diff --git a/commonmark/src/main/java/org/commonmark/parser/IncludeSourceSpans.java b/commonmark/src/main/java/org/commonmark/parser/IncludeSourceSpans.java
new file mode 100644
index 000000000..91d2b4e00
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/parser/IncludeSourceSpans.java
@@ -0,0 +1,22 @@
+package org.commonmark.parser;
+
+/**
+ * Whether to include {@link org.commonmark.node.SourceSpan} or not while parsing,
+ * see {@link Parser.Builder#includeSourceSpans(IncludeSourceSpans)}.
+ *
+ * @since 0.16.0
+ */
+public enum IncludeSourceSpans {
+ /**
+ * Do not include source spans.
+ */
+ NONE,
+ /**
+ * Include source spans on {@link org.commonmark.node.Block} nodes.
+ */
+ BLOCKS,
+ /**
+ * Include source spans on block nodes and inline nodes.
+ */
+ BLOCKS_AND_INLINES,
+}
diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParser.java b/commonmark/src/main/java/org/commonmark/parser/InlineParser.java
index 492c3cc8a..49043a64f 100644
--- a/commonmark/src/main/java/org/commonmark/parser/InlineParser.java
+++ b/commonmark/src/main/java/org/commonmark/parser/InlineParser.java
@@ -8,8 +8,8 @@
public interface InlineParser {
/**
- * @param input the content to parse as inline
+ * @param lines the source content to parse as inline
* @param node the node to append resulting nodes to (as children)
*/
- void parse(String input, Node node);
+ void parse(SourceLines lines, Node node);
}
diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java
index 467742e2c..12007610b 100644
--- a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java
+++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java
@@ -1,9 +1,12 @@
package org.commonmark.parser;
import org.commonmark.node.LinkReferenceDefinition;
+import org.commonmark.parser.beta.LinkProcessor;
+import org.commonmark.parser.beta.InlineContentParserFactory;
import org.commonmark.parser.delimiter.DelimiterProcessor;
import java.util.List;
+import java.util.Set;
/**
* Context for inline parsing.
@@ -11,15 +14,47 @@
public interface InlineParserContext {
/**
- * @return custom delimiter processors that have been configured with {@link Parser.Builder#customDelimiterProcessor(DelimiterProcessor)}
+ * @return custom inline content parsers that have been configured with
+ * {@link Parser.Builder#customInlineContentParserFactory(InlineContentParserFactory)}
+ */
+ List getCustomInlineContentParserFactories();
+
+ /**
+ * @return custom delimiter processors that have been configured with
+ * {@link Parser.Builder#customDelimiterProcessor(DelimiterProcessor)}
*/
List getCustomDelimiterProcessors();
+ /**
+ * @return custom link processors that have been configured with {@link Parser.Builder#linkProcessor}.
+ */
+ List getCustomLinkProcessors();
+
+ /**
+ * @return custom link markers that have been configured with {@link Parser.Builder#linkMarker}.
+ */
+ Set getCustomLinkMarkers();
+
/**
* Look up a {@link LinkReferenceDefinition} for a given label.
+ *
+ * Note that the passed in label does not need to be normalized; implementations are responsible for doing the
+ * normalization before lookup.
*
* @param label the link label to look up
* @return the definition if one exists, {@code null} otherwise
+ * @deprecated use {@link #getDefinition} with {@link LinkReferenceDefinition} instead
*/
+ @Deprecated
LinkReferenceDefinition getLinkReferenceDefinition(String label);
+
+ /**
+ * Look up a definition of a type for a given label.
+ *
+ * Note that the passed in label does not need to be normalized; implementations are responsible for doing the
+ * normalization before lookup.
+ *
+ * @return the definition if one exists, null otherwise
+ */
+ D getDefinition(Class type, String label);
}
diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java
index 34c384a8a..c1640e9d8 100644
--- a/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java
+++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java
@@ -4,5 +4,9 @@
* Factory for custom inline parser.
*/
public interface InlineParserFactory {
+
+ /**
+ * Create an {@link InlineParser} to use for parsing inlines. This is called once per parsed document.
+ */
InlineParser create(InlineParserContext inlineParserContext);
}
diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java
index 5e15158ad..8faac789b 100644
--- a/commonmark/src/main/java/org/commonmark/parser/Parser.java
+++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java
@@ -1,19 +1,21 @@
package org.commonmark.parser;
import org.commonmark.Extension;
+import org.commonmark.internal.Definitions;
import org.commonmark.internal.DocumentParser;
import org.commonmark.internal.InlineParserContextImpl;
import org.commonmark.internal.InlineParserImpl;
import org.commonmark.node.*;
+import org.commonmark.parser.beta.LinkInfo;
+import org.commonmark.parser.beta.LinkProcessor;
+import org.commonmark.parser.beta.InlineContentParserFactory;
+import org.commonmark.parser.beta.LinkResult;
import org.commonmark.parser.block.BlockParserFactory;
import org.commonmark.parser.delimiter.DelimiterProcessor;
import java.io.IOException;
import java.io.Reader;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
/**
@@ -28,20 +30,31 @@
public class Parser {
private final List blockParserFactories;
+ private final List inlineContentParserFactories;
private final List delimiterProcessors;
+ private final List linkProcessors;
+ private final Set linkMarkers;
private final InlineParserFactory inlineParserFactory;
private final List postProcessors;
+ private final IncludeSourceSpans includeSourceSpans;
+ private final int maxOpenBlockParsers;
private Parser(Builder builder) {
this.blockParserFactories = DocumentParser.calculateBlockParserFactories(builder.blockParserFactories, builder.enabledBlockTypes);
this.inlineParserFactory = builder.getInlineParserFactory();
this.postProcessors = builder.postProcessors;
+ this.inlineContentParserFactories = builder.inlineContentParserFactories;
this.delimiterProcessors = builder.delimiterProcessors;
+ this.linkProcessors = builder.linkProcessors;
+ this.linkMarkers = builder.linkMarkers;
+ this.includeSourceSpans = builder.includeSourceSpans;
+ this.maxOpenBlockParsers = builder.maxOpenBlockParsers;
// Try to construct an inline parser. Invalid configuration might result in an exception, which we want to
// detect as soon as possible.
- this.inlineParserFactory.create(new InlineParserContextImpl(delimiterProcessors,
- Collections.emptyMap()));
+ var context = new InlineParserContextImpl(
+ inlineContentParserFactories, delimiterProcessors, linkProcessors, linkMarkers, new Definitions());
+ this.inlineParserFactory.create(context);
}
/**
@@ -62,9 +75,7 @@ public static Builder builder() {
* @return the root node
*/
public Node parse(String input) {
- if (input == null) {
- throw new NullPointerException("input must not be null");
- }
+ Objects.requireNonNull(input, "input must not be null");
DocumentParser documentParser = createDocumentParser();
Node document = documentParser.parse(input);
return postProcess(document);
@@ -89,17 +100,15 @@ public Node parse(String input) {
* @throws IOException when reading throws an exception
*/
public Node parseReader(Reader input) throws IOException {
- if (input == null) {
- throw new NullPointerException("input must not be null");
- }
-
+ Objects.requireNonNull(input, "input must not be null");
DocumentParser documentParser = createDocumentParser();
Node document = documentParser.parse(input);
return postProcess(document);
}
private DocumentParser createDocumentParser() {
- return new DocumentParser(blockParserFactories, inlineParserFactory, delimiterProcessors);
+ return new DocumentParser(blockParserFactories, inlineParserFactory, inlineContentParserFactories,
+ delimiterProcessors, linkProcessors, linkMarkers, includeSourceSpans, maxOpenBlockParsers);
}
private Node postProcess(Node document) {
@@ -114,10 +123,15 @@ private Node postProcess(Node document) {
*/
public static class Builder {
private final List blockParserFactories = new ArrayList<>();
+ private final List inlineContentParserFactories = new ArrayList<>();
private final List delimiterProcessors = new ArrayList<>();
+ private final List linkProcessors = new ArrayList<>();
private final List postProcessors = new ArrayList<>();
+ private final Set linkMarkers = new HashSet<>();
private Set> enabledBlockTypes = DocumentParser.getDefaultBlockParserTypes();
private InlineParserFactory inlineParserFactory;
+ private IncludeSourceSpans includeSourceSpans = IncludeSourceSpans.NONE;
+ private int maxOpenBlockParsers = Integer.MAX_VALUE;
/**
* @return the configured {@link Parser}
@@ -131,9 +145,7 @@ public Parser build() {
* @return {@code this}
*/
public Builder extensions(Iterable extends Extension> extensions) {
- if (extensions == null) {
- throw new NullPointerException("extensions must not be null");
- }
+ Objects.requireNonNull(extensions, "extensions must not be null");
for (Extension extension : extensions) {
if (extension instanceof ParserExtension) {
ParserExtension parserExtension = (ParserExtension) extension;
@@ -162,24 +174,58 @@ public Builder extensions(Iterable extends Extension> extensions) {
* E.g., to only parse headings and lists:
*
*
* @param enabledBlockTypes A list of block nodes the parser will parse.
- * If this list is empty, the parser will not recognize any CommonMark core features.
+ * If this list is empty, the parser will not recognize any CommonMark core features.
* @return {@code this}
*/
public Builder enabledBlockTypes(Set> enabledBlockTypes) {
- if (enabledBlockTypes == null) {
- throw new NullPointerException("enabledBlockTypes must not be null");
- }
+ Objects.requireNonNull(enabledBlockTypes, "enabledBlockTypes must not be null");
+ DocumentParser.checkEnabledBlockTypes(enabledBlockTypes);
this.enabledBlockTypes = enabledBlockTypes;
return this;
}
/**
- * Adds a custom block parser factory.
+ * Whether to calculate source positions for parsed {@link Node Nodes}, see {@link Node#getSourceSpans()}.
+ *
+ * By default, source spans are disabled.
+ *
+ * @param includeSourceSpans which kind of source spans should be included
+ * @return {@code this}
+ * @since 0.16.0
+ */
+ public Builder includeSourceSpans(IncludeSourceSpans includeSourceSpans) {
+ this.includeSourceSpans = includeSourceSpans;
+ return this;
+ }
+
+ /**
+ * Limit how many block parsers may be open at once while parsing.
+ *
+ * Once the limit is reached, additional block starts are treated as plain text instead of
+ * creating deeper nested block structure.
+ *
+ * The document root parser is not counted. The default is unlimited, so callers that keep
+ * using {@code Parser.builder().build()} preserve behavior.
+ *
+ * @param maxOpenBlockParsers maximum number of open non-document block parsers, must be
+ * zero or greater
+ * @return {@code this}
+ */
+ public Builder maxOpenBlockParsers(int maxOpenBlockParsers) {
+ if (maxOpenBlockParsers < 0) {
+ throw new IllegalArgumentException("maxOpenBlockParsers must be >= 0");
+ }
+ this.maxOpenBlockParsers = maxOpenBlockParsers;
+ return this;
+ }
+
+ /**
+ * Add a custom block parser factory.
*
* Note that custom factories are applied before the built-in factories. This is so that
* extensions can change how some syntax is parsed that would otherwise be handled by built-in factories.
@@ -189,35 +235,78 @@ public Builder enabledBlockTypes(Set> enabledBlockTypes)
* @return {@code this}
*/
public Builder customBlockParserFactory(BlockParserFactory blockParserFactory) {
- if (blockParserFactory == null) {
- throw new NullPointerException("blockParserFactory must not be null");
- }
+ Objects.requireNonNull(blockParserFactory, "blockParserFactory must not be null");
blockParserFactories.add(blockParserFactory);
return this;
}
/**
- * Adds a custom delimiter processor.
+ * Add a factory for a custom inline content parser, for extending inline parsing or overriding built-in parsing.
+ *
+ * Note that parsers are triggered based on a special character as specified by
+ * {@link InlineContentParserFactory#getTriggerCharacters()}. It is possible to register multiple parsers for the same
+ * character, or even for some built-in special character such as {@code `}. The custom parsers are tried first
+ * in order in which they are registered, and then the built-in ones.
+ */
+ public Builder customInlineContentParserFactory(InlineContentParserFactory inlineContentParserFactory) {
+ Objects.requireNonNull(inlineContentParserFactory, "inlineContentParser must not be null");
+ inlineContentParserFactories.add(inlineContentParserFactory);
+ return this;
+ }
+
+ /**
+ * Add a custom delimiter processor for inline parsing.
*
* Note that multiple delimiter processors with the same characters can be added, as long as they have a
* different minimum length. In that case, the processor with the shortest matching length is used. Adding more
* than one delimiter processor with the same character and minimum length is invalid.
+ *
+ * If you want more control over how parsing is done, you might want to use
+ * {@link #customInlineContentParserFactory} instead.
*
* @param delimiterProcessor a delimiter processor implementation
* @return {@code this}
*/
public Builder customDelimiterProcessor(DelimiterProcessor delimiterProcessor) {
- if (delimiterProcessor == null) {
- throw new NullPointerException("delimiterProcessor must not be null");
- }
+ Objects.requireNonNull(delimiterProcessor, "delimiterProcessor must not be null");
delimiterProcessors.add(delimiterProcessor);
return this;
}
+ /**
+ * Add a custom link/image processor for inline parsing.
+ *
+ * Multiple link processors can be added, and will be tried in order in which they were added. If no link
+ * processor applies, the normal behavior applies. That means these can override built-in link parsing.
+ *
+ * @param linkProcessor a link processor implementation
+ * @return {@code this}
+ */
+ public Builder linkProcessor(LinkProcessor linkProcessor) {
+ Objects.requireNonNull(linkProcessor, "linkProcessor must not be null");
+ linkProcessors.add(linkProcessor);
+ return this;
+ }
+
+ /**
+ * Add a custom link marker for link processing. A link marker is a character like {@code !} which, if it
+ * appears before the {@code [} of a link, changes the meaning of the link.
+ *
+ * If a link marker followed by a valid link is parsed, the {@link org.commonmark.parser.beta.LinkInfo}
+ * that is passed to {@link LinkProcessor} will have its {@link LinkInfo#marker()} set. A link processor should
+ * check the {@link Text#getLiteral()} and then do any processing, and will probably want to use {@link LinkResult#includeMarker()}.
+ *
+ * @param linkMarker a link marker character
+ * @return {@code this}
+ */
+ public Builder linkMarker(Character linkMarker) {
+ Objects.requireNonNull(linkMarker, "linkMarker must not be null");
+ linkMarkers.add(linkMarker);
+ return this;
+ }
+
public Builder postProcessor(PostProcessor postProcessor) {
- if (postProcessor == null) {
- throw new NullPointerException("postProcessor must not be null");
- }
+ Objects.requireNonNull(postProcessor, "postProcessor must not be null");
postProcessors.add(postProcessor);
return this;
}
@@ -234,7 +323,6 @@ public Builder postProcessor(PostProcessor postProcessor) {
* link ([title](http://))
* image ()
*
- *
* Note that if this method is not called or the inline parser factory is set to null, then the default
* implementation will be used.
*
@@ -249,13 +337,9 @@ public Builder inlineParserFactory(InlineParserFactory inlineParserFactory) {
private InlineParserFactory getInlineParserFactory() {
if (inlineParserFactory != null) {
return inlineParserFactory;
+ } else {
+ return InlineParserImpl::new;
}
- return new InlineParserFactory() {
- @Override
- public InlineParser create(InlineParserContext inlineParserContext) {
- return new InlineParserImpl(inlineParserContext);
- }
- };
}
}
diff --git a/commonmark/src/main/java/org/commonmark/parser/SourceLine.java b/commonmark/src/main/java/org/commonmark/parser/SourceLine.java
new file mode 100644
index 000000000..92a8cdfaf
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/parser/SourceLine.java
@@ -0,0 +1,47 @@
+package org.commonmark.parser;
+
+import org.commonmark.node.SourceSpan;
+
+import java.util.Objects;
+
+/**
+ * A line or part of a line from the input source.
+ *
+ * @since 0.16.0
+ */
+public class SourceLine {
+
+ private final CharSequence content;
+ private final SourceSpan sourceSpan;
+
+ public static SourceLine of(CharSequence content, SourceSpan sourceSpan) {
+ return new SourceLine(content, sourceSpan);
+ }
+
+ private SourceLine(CharSequence content, SourceSpan sourceSpan) {
+ this.content = Objects.requireNonNull(content, "content must not be null");
+ this.sourceSpan = sourceSpan;
+ }
+
+ public CharSequence getContent() {
+ return content;
+ }
+
+ public SourceSpan getSourceSpan() {
+ return sourceSpan;
+ }
+
+ public SourceLine substring(int beginIndex, int endIndex) {
+ CharSequence newContent = content.subSequence(beginIndex, endIndex);
+ SourceSpan newSourceSpan = null;
+ if (sourceSpan != null) {
+ int length = endIndex - beginIndex;
+ if (length != 0) {
+ int columnIndex = sourceSpan.getColumnIndex() + beginIndex;
+ int inputIndex = sourceSpan.getInputIndex() + beginIndex;
+ newSourceSpan = SourceSpan.of(sourceSpan.getLineIndex(), columnIndex, inputIndex, length);
+ }
+ }
+ return SourceLine.of(newContent, newSourceSpan);
+ }
+}
diff --git a/commonmark/src/main/java/org/commonmark/parser/SourceLines.java b/commonmark/src/main/java/org/commonmark/parser/SourceLines.java
new file mode 100644
index 000000000..0b4290341
--- /dev/null
+++ b/commonmark/src/main/java/org/commonmark/parser/SourceLines.java
@@ -0,0 +1,66 @@
+package org.commonmark.parser;
+
+import org.commonmark.node.SourceSpan;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A set of lines ({@link SourceLine}) from the input source.
+ *
+ * @since 0.16.0
+ */
+public class SourceLines {
+
+ private final List lines = new ArrayList<>();
+
+ public static SourceLines empty() {
+ return new SourceLines();
+ }
+
+ public static SourceLines of(SourceLine sourceLine) {
+ SourceLines sourceLines = new SourceLines();
+ sourceLines.addLine(sourceLine);
+ return sourceLines;
+ }
+
+ public static SourceLines of(List